From 462539f59a7a224af9a3b627cd578b16c8e93db0 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 20:18:48 +0200 Subject: [PATCH 01/35] Speed up the PDF render by another ~2.5s by getting rid of idle time. --- docs/lib/paged.browser.js | 22 +- perf/README.md | 437 ++++++++++++++++++++++++++++++++++++++ perf/instrument-detach.js | 97 +++++++++ 3 files changed, 551 insertions(+), 5 deletions(-) create mode 100644 perf/instrument-detach.js diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 533bf298..1d1a5b8c 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -2664,7 +2664,13 @@ constructor(context){ this._q = []; this.context = context; - this.tick = requestAnimationFrame; + // [PATCH: queue-tick] Upstream uses requestAnimationFrame as the + // per-task tick, which on a headless puppeteer render still waits + // per frame even with no compositor. Across 1651 pages that's + // ~700 ms of V8 (idle). queueMicrotask schedules on the microtask + // queue and fires before the next event-loop iteration, dropping + // the per-page wait to microsecond-scale. + this.tick = (cb) => queueMicrotask(cb); this.running = false; this.paused = false; } @@ -3065,13 +3071,19 @@ // } async render(parsed, startAt) { + // [PATCH: drop-queue] Upstream routes per-page iteration through + // this.q.enqueue(...), but the queue's only job is serialization + // and the async generator is already inherently serial. Dropping + // the indirection cuts a queueMicrotask hop and a Promise/deferred + // allocation per page. let renderer = this.layout(parsed, startAt); - let done = false; let result; - while (!done) { - result = await this.q.enqueue(() => { return this.renderAsync(renderer); }); - done = result.done; + while (true) { + if (this.stopped) return { done: true, canceled: true }; + result = await renderer.next(); + if (this.stopped) return { done: true, canceled: true }; + if (result.done) break; } return result; diff --git a/perf/README.md b/perf/README.md index 135e873d..d8ba7d32 100644 --- a/perf/README.md +++ b/perf/README.md @@ -2437,3 +2437,440 @@ generate. After the puppeteer 25 bump it would save less than the earlier estimate (the 64 s -> 43 s gain made the target smaller), but it's still the only knob with a profile target large enough to move the wall-clock total by 5+ s. + +## Can we make `removeChild` cheaper? + +After the findRef fix, `removeChild` sits at ~12 % of render +self-time. The detach-pages handler attribution is clean -- 1651 +detaches for 1651 pages, exactly one per page, with the only +other removeChild callers being `filterTree` at startup (9,192 +ignorable-text-node strips totalling 2.3 ms; not a hot path). + +Per-call cost on the 1651-page book, with `Element.prototype.removeChild` +wrapped to measure each call: + +``` +[instrument] page-detach avg: 1.009 ms/call +[instrument] page-detach median: 0.900 ms/call +[instrument] page-detach p90: 2.000 ms/call +[instrument] page-detach p99: 3.000 ms/call +[instrument] avg descendants/page: 147.7 +``` + +That's ~5-7 us per descendant LayoutObject torn down, multiplied +by ~150 descendants per page, multiplied by ~1651 pages = ~1.7 s +total. The distribution is tight and scales linearly with +descendant count -- this looks like ordinary Blink teardown work +rather than a pathological slow path. + +To verify, two structural variants both tested at the same +instrumentation harness: + +### Variant B: graveyard DocumentFragment + +Replace `parent.removeChild(page)` with +`graveyard.appendChild(page)`, where `graveyard` is a fresh +`DocumentFragment` held by the handler. Hypothesis: the +move-to-out-of-document-fragment path might skip some +LayoutObject teardown work because the destination is itself +disconnected. + +| metric | A (removeChild) | B (graveyard) | +| ------ | --------------- | ------------- | +| avg per call | **1.009 ms** | 1.082 ms (+7 %) | +| median | 0.900 ms | 0.900 ms | +| p90 | 2.000 ms | 2.200 ms | +| p99 | 3.000 ms | 3.100 ms | +| total page wall | 1666 ms | 1785 ms | +| render wall-clock | ~16.1 s | ~15.2 s (run-to-run noise) | + +The graveyard move is **slightly slower** per call. Blink tears +down the LayoutObjects regardless of where the node lands; there's +no fast-path for "moved to a detached parent". No win. + +### Variant C: `contain: layout style` on `.pagedjs_page` + +Inject `` +into the document before render. Hypothesis: removing a contained +subtree might skip style/layout invalidation propagation because +Blink already knows the subtree didn't influence its siblings or +parent. + +Also tested `contain: strict` (which adds `paint` and `size` +containment -- pages already have explicit dimensions via @page +CSS so this is safe). + +| metric | A (no contain) | C (layout style) | C-strict | +| ------ | -------------- | ---------------- | -------- | +| avg per call | **1.009 ms** | 1.017 ms | 0.991 ms | +| median | 0.900 ms | 0.900 ms | 0.900 ms | +| p90 | 2.000 ms | 1.900 ms | 1.900 ms | +| total page wall | 1666 ms | 1678 ms | 1634 ms | +| render wall-clock | ~16.1 s | ~15.0 s | ~14.8 s | + +All four runs are within ~5 % of each other on per-call cost -- +well inside the run-to-run noise band. Containment doesn't unlock +a faster removeChild path either. + +### Conclusion (variants B + C) + +The 1.7 s of `removeChild` is intrinsic Blink LayoutObject +teardown work. The math checks out at ~5-7 us per descendant × +~150 descendants × 1651 pages, and three different framings +(plain removeChild, move-to-fragment, contain + removeChild) all +land within ~10 % of each other. The destination of the move and +the containment metadata don't change Blink's teardown rate. + +The one thing we *don't* do is "remove less per page" -- removing +a page's content as N individual leaf removals would be strictly +worse (N × overhead instead of 1 × overhead, same teardown total). +Each removeChild call carries DOM-mutation, style-invalidation, +and notify overhead beyond the per-descendant cost, so consolidating +to one removal per page is already the optimal framing. + +### Variant D: don't detach at all, just `contain: strict` + +A natural follow-up: if the per-page cost of having siblings +around really comes from style/selector traversal, maybe Blink +will skip a *contained* sibling subtree even when it can't skip +a `display: none` one. Containment is a stronger signal -- it +explicitly tells the engine "no observable interaction crosses +this boundary" -- so the renderer ought to be able to short-circuit +sibling-walks more aggressively. + +Implementation: replace the detach handler with one that sets +`pageElement.style.contain = 'strict'` at finalizePage and clears +the property for every page at afterRendered (so `page.pdf()` +serializes the right paint state). + +Result: + +| metric | current detach | variant D (contain:strict, no detach) | +| ------ | -------------- | --------------------------------------- | +| **render wall-clock** | **~16 s** | **89.3 s** | +| `Page.create` gBCR | ~764 ms | **31,142 ms** | +| `hasOverflow` gBCR | ~2,478 ms | 10,922 ms | +| total gBCR | ~4,832 ms | 45,413 ms | +| per-page ratio (last/first) | 1.36x | 4.11x | + +Worse than the README's display:none baseline (`Page.create` +gBCR 12,947 ms / render 48.5 s). Containment metadata adds work +to per-sibling evaluation rather than removing it. **Definitive +no.** Containment is a hint about what's inside the box; it +doesn't make the box invisible to neighbours. + +### Variant E: empty the wrapper, leave it in place + +A second framing of the same idea: keep the page wrapper as a +sibling, but move its children to a stash so the wrapper itself +is a leaf (no descendants for Blink to walk through). Restore +the children at afterRendered. This isolates the "what costs +what" question: does sibling-walk cost depend on descendant +count, or just on sibling count? + +Implementation: at finalizePage, for the previous-finalized page +(one behind, mirroring the keep-one-back pattern), move each +child into an array via `wrapper.removeChild(wrapper.firstChild)`, +set `min-height: 297mm` so the wrapper still occupies its slot, +and stash the children. At afterRendered, restore. + +Result: + +| metric | current detach | variant E (empty wrapper) | +| ------ | -------------- | --------------------------- | +| **render wall-clock** | **~16 s** | **21.9 s** | +| `Page.create` gBCR | ~764 ms | 2,628 ms (+1,864) | +| `hasOverflow` gBCR | ~2,478 ms | 5,024 ms (+2,546) | +| `Layout` gBCR | ~294 ms | 937 ms | +| total gBCR | ~4,832 ms | **10,127 ms (+5,295)** | +| `removeChild` self | 2,426 ms | **854 ms (-1,572)** | +| per-page ratio (last/first) | 1.36x | 2.93x | + +The removeChild *savings* are real -- with no wrapper to tear +down, just ~150 child removals per page at sub-microsecond each. +But the gBCR *cost* roughly doubles because the wrappers are +still siblings, and gBCR firings have to walk them. Net is +5 s +render, *worse* than the current detach. + +This experiment yields a clean cost-model decomposition. Pulling +the gBCR deltas apart against the wrapper-vs-content split: + +``` +display:none baseline (full content): gBCR(Page.create) ≈ 12,947 ms +variant E (empty wrappers, n=1651): gBCR(Page.create) ≈ 2,628 ms +current detach (no siblings): gBCR(Page.create) ≈ 764 ms +``` + +Subtracting: + +- (variant E - current detach) = 1,864 ms for 1,651 sibling wrappers + → ~1.1 us per wrapper-sibling per `Page.create` gBCR call +- (display:none - variant E) = 10,319 ms for 1,651 × 150 ≈ + 247,650 sibling descendants + → ~42 us per sibling-descendant per `Page.create` gBCR call + +Both wrappers and their descendants contribute to the per-call +cost. Removing the descendants helps -- variant E really is +substantially cheaper than display:none -- but the wrapper cost +alone is enough to lose. To zero out both contributions you have +to take both the wrapper and its descendants out of the sibling +list, which is exactly what the current detach does. + +### Variant F: `content-visibility: hidden`, no detach + +The CSS spec's `content-visibility: hidden` is the closest +property to "freeze in place without disposing" -- per spec, +rendering work is "skipped" but cached state is preserved for +cheap restoration. Conceptually nearer to a freeze than +`display: none` or `contain: strict` were. + +Implementation: at finalizePage, set +`pageElement.style.contentVisibility = 'hidden'` and +`containIntrinsicSize = '210mm 297mm'` (the size hint Blink uses +when content-visibility skips a subtree). At afterRendered, +clear both. + +Result: + +| metric | current detach | variant F (cv:hidden) | +| ------ | -------------- | ----------------------- | +| **render wall-clock** | **~16 s** | **95.2 s** | +| `Page.create` gBCR | ~764 ms | **29,656 ms** | +| `hasOverflow` gBCR | ~2,478 ms | 17,558 ms | +| total gBCR | ~4,832 ms | 52,899 ms | +| per-page ratio (last/first) | 1.36x | 5.12x | + +Worse than every other variant. The spec's "skip rendering work" +clause covers painting and composition; it does **not** make the +subtree invisible to sibling-walks during style and selector +matching that gBCR forces. Three "leave in place" properties +(`display: none`, `contain: strict`, `content-visibility: hidden`) +have now been tested and none of them short-circuit the +sibling-walk. + +### Conclusion across all six variants + +| variant | render | net vs current | +| ------- | ------ | -------------- | +| A current (removeChild, no contain) | ~16.1 s | (baseline) | +| B graveyard fragment | ~15.2 s | flat (noise) | +| C `contain: layout style` + removeChild | ~15.0 s | flat (noise) | +| C-strict `contain: strict` + removeChild | ~14.8 s | flat (noise) | +| **D `contain: strict`, no detach** | **89.3 s** | **+73 s** | +| **E empty wrappers, no detach** | **21.9 s** | **+5.9 s** | +| **F `content-visibility: hidden`, no detach** | **95.2 s** | **+79 s** | + +The flat band (A/B/C/C-strict) is the cost-of-doing-business -- +~1 ms × 1651 pages = ~1.7 s of intrinsic Blink LayoutObject +teardown. Variations on the framing don't move it. The +catastrophic band (D, E) confirms that any path where the page +wrapper stays in the live sibling list pays meaningfully more +than the teardown cost would have been -- ~1.1 us per +wrapper-sibling × 1651 wrappers × several gBCR call sites per +page comes out to several seconds of extra render even when the +wrapper is otherwise empty and contained. + +The 1.7 s is the bill we pay for shrinking the live DOM from +~150 × 1651 ≈ 250k nodes back down to 2 nodes (in-flight page + +keeper), which is what kept `Page.create`'s gBCR flat per page +(see "Hypothesis 2: sibling sweeps over `display: none` pages" +above). Net savings vs the display:none variant was ~22 s render; +the 1.7 s removeChild cost is roughly 8 % of that win paid back +to Blink for cleanup. Worth keeping. + +### Aside: it's not GC, and JS references don't help + +A reasonable follow-up question to all of this is "can we just +hold a reference to the detached children to avoid disposal, +or turn off GC to skip the cleanup?" Neither applies to what +we're measuring. + +Chromium maintains two trees: + +- **DOM tree** -- `Node` objects, JS-visible, referenceable. +- **Render tree** -- `LayoutObject` / `LayoutBox` / `LayoutText` + etc., Blink-internal, NOT JS-visible. + +`removeChild` keeps the DOM Node alive (JS reference holders -- +including the handler's `this._detached` array -- prevent +collection). But the corresponding LayoutObject in the render +tree is **destroyed immediately**, synchronously, at the +removeChild call. Re-attaching via appendChild later builds a +new LayoutObject from scratch. + +There is no JS-level API to keep a LayoutObject alive across +detach + reattach. Holding DOM references doesn't change the +render-tree lifecycle. The 1.7 s lives entirely in +LayoutObject teardown -- which is Blink-internal C++ work +attributed to the `removeChild` native frame in the profile, +not to GC. + +V8's GC is a separate concern and isn't the bottleneck. The +profile reads: + +``` + self_ms self_% function + 195.21 0.89% (garbage collector) +``` + +~200 ms over a ~22 s render. Even if it could be disabled +(it can't -- Node would OOM), it would barely register. + +The asymmetry between variants B and E makes this concrete. +Variant B (graveyard fragment) moves the page from +`.pagedjs_pages` to a detached DocumentFragment; variant E +(empty wrapper) keeps the page in `.pagedjs_pages` but moves +its children out. The fragment-move path *does* trigger +LayoutObject teardown (you can see the 1.08 ms / call in +variant B's instrumentation) even though the DOM Node lives on +in a JS-visible fragment -- because the destination is itself +not attached to the document, so there's no live render-tree +parent. Conversely, variant E's wrapper stays in +`.pagedjs_pages` with a live LayoutObject the whole time, so +the wrapper's render-tree slot doesn't get torn down; only +its child LayoutObjects do (as the children move out). The +"keep render objects alive" idea would have to mean keeping +the wrapper in `.pagedjs_pages` with all its children, which +is the display:none baseline -- ~48 s render. + +The trade-off is therefore not "keep things alive vs. let GC +collect them"; it's "be a live render-tree sibling vs. not". +Anything that keeps the wrapper as a live sibling pays the +~1.1 us per wrapper-sibling per gBCR call shown above, and the +gBCR firings compound that into seconds across 1651 pages. + +## Chasing the residual `(idle)` to requestAnimationFrame + +A second axis of the same investigation. The post-findRef-fix +profile showed `(idle) 735 ms (4.6 %)` -- not huge, but non-zero +and worth understanding. `(idle)` in a V8 CPU profile means +samples taken while the main thread had nothing scheduled -- +waiting on async/await, microtask queue settling, requestAnimationFrame +ticks, or other browser-internal yields. + +### Hypothesis 1: microtask boundaries from `await Hook.trigger(...)` + +The chunker's per-page loop has 5-6 `await this.hooks.X.trigger(...)` +calls per page. `Hook.trigger()` wraps every sync handler in a fresh +Promise and returns `Promise.all(promises)`, so the caller always +awaits a thenable -- a microtask boundary per await even when every +handler resolved synchronously. 5 boundaries × 1651 pages ≈ 8,255 +yields; if each yield is ~85 us in V8 it lines up with the 735 ms. + +Patched it: `Hook.trigger()` returns `undefined` when no handler +returned a thenable, callers do +`let p = hook.trigger(...); if (p) await p;` to skip the await on +the sync fast path. Patched at four hot per-page sites (3 in +`chunker.layout`, 3 in `chunker.handleBreaks`). + +Result: render went **up** by ~0.35 s on a 2-run paired A/B +(14.57 s -> 14.92 s avg). `(idle)` in the profile went **up too** +(735 ms -> 1223 ms in absolute terms). Microtask boundaries are +~30 us each at the JIT level; the V8 sampler at 1 ms intervals +hardly catches them, so they show up as `(program)` rather than +`(idle)`. The patch shaved microtask scheduling cost in the +single-digit percent range but added a branch on every Hook.trigger +call -- net wash, slight regression. **Reverted.** + +### Hypothesis 2: ResizeObserver firing per page + +Per page, `Page.addResizeObserver` creates a fresh `ResizeObserver` +that fires its callback asynchronously from the compositor thread +back to main. The callback wraps work in `requestAnimationFrame`, +so each RO firing schedules a frame-tick wait. 1651 pages × ~0.5 ms +per RO-rAF round-trip ≈ ~800 ms. Plausible. + +Two-step probe: +1. **Skip the rAF wrap inside the RO callback**, run synchronously. + Result: `(idle) 902 ms`. No improvement, possibly slightly worse. +2. **Disable the ResizeObserver entirely** (early-return in + `addResizeObserver`). Result: `(idle) 1,074 ms`. Still no + improvement. + +Neither helped. The RO isn't the source -- the per-page +`addResizeObserver` overhead is real, but it doesn't show up in +the `(idle)` bucket. Restored upstream behaviour. + +### Hypothesis 3: the chunker's `Queue.tick` is `requestAnimationFrame` + +The chunker drives its per-page work through a `Queue` class +(`paged.browser.js:2666`). The queue's constructor sets: + +```js +this.tick = requestAnimationFrame; +``` + +and `Queue.run()` schedules each iteration via +`this.tick.call(window, () => { ... });`. Chunker's `render()` +loops over `this.q.enqueue(() => this.renderAsync(renderer))` +once per page. Every per-page iteration therefore waits one rAF +tick before processing. + +`requestAnimationFrame` waits for the next animation frame. In +headless puppeteer with no display, rAF still delivers callbacks +on a regular cadence (Chromium's headless mode default is around +60 Hz off-screen / ~16 ms per frame, with the scheduler often +batching tighter than that). Either way, per-page rAF waits +across 1651 pages add up to several hundred milliseconds of pure +main-thread idle. + +The fix is one line: + +```js +this.tick = (cb) => queueMicrotask(cb); +``` + +`queueMicrotask` schedules the callback on the microtask queue -- +runs before returning to the event loop, microsecond-scale latency +instead of millisecond-scale. The `Queue` doesn't depend on rAF +semantics (no paint coordination, no frame-budget yielding -- +it's just a serializer that wants to run tasks back-to-back). + +Verification (paired 2-run A/B, `--detach-pages`, no +instrumentation, no cpu-profile): + +| run | BEFORE render | AFTER render | +| --- | --- | --- | +| 1 | 14.62 s | 11.86 s | +| 2 | 14.51 s | 12.12 s | +| **avg** | **14.57 s** | **11.99 s** | + +**Δ = -2.58 s render (-18 %).** Larger than the 735 ms `(idle)` +that prompted the look -- because rAF was costing real (program) +work too (V8 scheduler, microtask queue draining around the rAF +boundary), not just idle wait. CPU profile of the fixed render: + +``` + self_ms self_% function + ------- ------ ---------------------------------------------- + 4355.74 34.75% getBoundingClientRect + 1935.89 15.45% removeChild + 1934.11 15.43% (program) (was 5872 -- down ~4 s) + 636.43 5.08% removeOverflow + -- (idle) absent from the top 10, < 130 ms (1 %) +``` + +`(idle)` dropped out of the top 10 (< 130 ms / 1 %), `(program)` +dropped from 5872 ms to 1934 ms (-4 s), `removeChild` dropped +slightly (2426 ms -> 1935 ms; smaller render = same per-call cost +× same call count, so this is sampling artefact, not a real +change). PDF byte size unchanged (within standard timestamp +drift). Shipped. + +### What the three hypotheses together teach + +`(idle)` in a V8 CPU profile attribution table is **not** primarily +microtask scheduling -- those are too fast to sample. It's +genuinely-waiting time, where the main thread had no V8 work to do. +The dominant source of waiting in our render was not async/await, +not ResizeObserver coalescing, but a `requestAnimationFrame` +buried in the chunker's task queue. Replacing it with +`queueMicrotask` collapses the per-page wait, and additionally +shrinks the surrounding V8 scheduler work because each rAF +callback came with its own setup / teardown overhead. + +The pattern to remember: if a profile shows non-trivial `(idle)` +in a render-style workload, hunt for explicit `requestAnimationFrame` +/ `setTimeout` / `requestIdleCallback` calls in the hot path before +investigating microtask machinery. The frame-paced scheduler is a +much bigger lever than the microtask scheduler. diff --git a/perf/instrument-detach.js b/perf/instrument-detach.js new file mode 100644 index 00000000..73afa9d6 --- /dev/null +++ b/perf/instrument-detach.js @@ -0,0 +1,97 @@ +// Per-call timing for the detach-pages.js removeChild path. +// +// Wraps detach-pages.js's removeChild call so we can see whether the +// cost is roughly flat per call (some Blink-internal fixed overhead) or +// scales with the page's descendant count (LayoutObject teardown). +// +// Loaded as an --additional-script AFTER detach-pages.js so the +// instrumentation can monkey-patch the prototype that detach-pages.js +// uses. Records per-call ns + descendant count + first-quarter / +// last-quarter buckets. Dump at afterRendered through the [instrument] +// prefix so the harness pipes it to stdout. + +(() => { + const origRemoveChild = Node.prototype.removeChild; + const samples = []; // { ns, descendants, isPage } + let totalNs = 0; + let pageDetachCount = 0; + let otherCount = 0; + + Node.prototype.removeChild = function (child) { + // count descendants quickly — only meaningful on Element children + let descendants = 0; + let isPage = false; + if (child && child.nodeType === 1) { + // Element.children.length is just direct kids; we want a count + // estimate of the subtree, but a full walk would skew the timing. + // Use childElementCount as a cheap proxy plus a textContent length + // bucket so we can correlate with size. + descendants = child.getElementsByTagName ? child.getElementsByTagName('*').length : 0; + isPage = child.classList && child.classList.contains('pagedjs_page'); + } + const t0 = performance.now(); + const r = origRemoveChild.call(this, child); + const ns = (performance.now() - t0) * 1e6; + totalNs += ns; + if (isPage) { + pageDetachCount++; + samples.push({ ns, descendants }); + } else { + otherCount++; + } + return r; + }; + + class DetachInstrument extends Paged.Handler { + afterRendered(pages) { + const total = pages.length; + const pageSamples = samples.slice(); + pageSamples.sort((a, b) => a.ns - b.ns); + const median = pageSamples.length ? pageSamples[Math.floor(pageSamples.length / 2)].ns : 0; + const p90 = pageSamples.length ? pageSamples[Math.floor(pageSamples.length * 0.9)].ns : 0; + const p99 = pageSamples.length ? pageSamples[Math.floor(pageSamples.length * 0.99)].ns : 0; + const sumDesc = pageSamples.reduce((s, x) => s + x.descendants, 0); + const sumNs = pageSamples.reduce((s, x) => s + x.ns, 0); + + console.log(`[instrument] removeChild wrapper: ${pageDetachCount} page detaches, ${otherCount} other`); + console.log(`[instrument] total removeChild wall: ${(totalNs / 1e6).toFixed(1)} ms`); + console.log(`[instrument] page-detach total: ${(sumNs / 1e6).toFixed(1)} ms`); + console.log(`[instrument] page-detach avg: ${(sumNs / pageDetachCount / 1e6).toFixed(3)} ms/call`); + console.log(`[instrument] page-detach median: ${(median / 1e6).toFixed(3)} ms/call`); + console.log(`[instrument] page-detach p90: ${(p90 / 1e6).toFixed(3)} ms/call`); + console.log(`[instrument] page-detach p99: ${(p99 / 1e6).toFixed(3)} ms/call`); + console.log(`[instrument] avg descendants/page: ${(sumDesc / pageDetachCount).toFixed(1)}`); + + // Bucket by descendant count to see proportionality. + const buckets = [ + { lo: 0, hi: 100, n: 0, ns: 0, desc: 0 }, + { lo: 100, hi: 200, n: 0, ns: 0, desc: 0 }, + { lo: 200, hi: 400, n: 0, ns: 0, desc: 0 }, + { lo: 400, hi: 800, n: 0, ns: 0, desc: 0 }, + { lo: 800, hi: 1600, n: 0, ns: 0, desc: 0 }, + { lo: 1600,hi: Infinity, n: 0, ns: 0, desc: 0 }, + ]; + for (const s of pageSamples) { + const b = buckets.find(bk => s.descendants >= bk.lo && s.descendants < bk.hi); + if (b) { b.n++; b.ns += s.ns; b.desc += s.descendants; } + } + console.log(`[instrument] removeChild cost by descendant-count bucket:`); + console.log(`[instrument] desc-range count total_ms avg_ms avg_desc ms_per_desc`); + for (const b of buckets) { + if (!b.n) continue; + const avgMs = b.ns / b.n / 1e6; + const avgDesc = b.desc / b.n; + const msPerDesc = avgDesc > 0 ? (avgMs / avgDesc) * 1000 : 0; + const range = b.hi === Infinity ? `${b.lo}+` : `${b.lo}-${b.hi}`; + console.log( + `[instrument] ${range.padEnd(10)} ${String(b.n).padStart(6)} ${avgMs.toFixed(3).padStart(8)} ${(b.ns/1e6).toFixed(1).padStart(8)} ${avgDesc.toFixed(0).padStart(10)} ${msPerDesc.toFixed(2).padStart(13)}` + ); + } + + // Restore so afterRendered's own removeChild (when re-appending) isn't double-charged. + Node.prototype.removeChild = origRemoveChild; + } + } + Paged.registerHandlers(DetachInstrument); + console.log('[instrument] removeChild wrapper installed'); +})(); From 5b148ff8ffab49c8c810fc9c3d2c0ddba6d5ead8 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 20:31:16 +0200 Subject: [PATCH 02/35] Hook fast-path: skip await microtask when no async handlers. --- docs/lib/paged.browser.js | 65 +++++++++++++++---------- perf/README.md | 99 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 25 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 1d1a5b8c..fa709ae0 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -1391,31 +1391,39 @@ } /** - * Triggers a hook to run all functions - * @example this.content.trigger(args).then(function(){...}); - * @return {Promise} results + * Triggers a hook to run all functions. + * @return {Promise|undefined} A Promise that resolves when all + * thenable-returning handlers settle, OR `undefined` if no + * handler returned a thenable (the all-synchronous fast path). + * + * [PATCH: hook-fast-path] Upstream always wrapped sync handler + * results in `new Promise(resolve => resolve(...))` and returned + * `Promise.all(promises)`, so callers' `await trigger(...)` was a + * mandatory microtask boundary even when every handler resolved + * synchronously. We return `undefined` on the all-sync path so + * callers can write: + * + * let p = hook.trigger(...); + * if (p) await p; + * + * and skip the microtask boundary entirely. Per-page hot-loop + * sites in the chunker do this; one-shot callers can keep the + * `await trigger(...)` form (`await undefined` still works, just + * with a cycle). */ trigger(){ var args = arguments; var context = this.context; - var promises = []; - - this.hooks.forEach(function(task) { - var executing = task.apply(context, args); + var promises; - if(executing && typeof executing["then"] === "function") { - // Task is a function that returns a promise - promises.push(executing); - } else { - // Otherwise Task resolves immediately, add resolved promise with result - promises.push(new Promise((resolve, reject) => { - resolve(executing); - })); + for (var i = 0; i < this.hooks.length; i++) { + var executing = this.hooks[i].apply(context, args); + if (executing && typeof executing["then"] === "function") { + (promises = promises || []).push(executing); } - }); - + } - return Promise.all(promises); + return promises ? Promise.all(promises) : undefined; } /** @@ -3173,11 +3181,14 @@ } if (page) { - await this.hooks.beforePageLayout.trigger(page, undefined, undefined, this); + // [PATCH: hook-fast-path] conditional await -- see Hook.trigger + let _p = this.hooks.beforePageLayout.trigger(page, undefined, undefined, this); + if (_p) await _p; this.emit("page", page); - // await this.hooks.layout.trigger(page.element, page, undefined, this); - await this.hooks.afterPageLayout.trigger(page.element, page, undefined, this); - await this.hooks.finalizePage.trigger(page.element, page, undefined, this); + _p = this.hooks.afterPageLayout.trigger(page.element, page, undefined, this); + if (_p) await _p; + _p = this.hooks.finalizePage.trigger(page.element, page, undefined, this); + if (_p) await _p; this.emit("renderedPage", page); } } @@ -3196,7 +3207,9 @@ let page = this.addPage(); - await this.hooks.beforePageLayout.trigger(page, content, breakToken, this); + // [PATCH: hook-fast-path] conditional await -- see Hook.trigger + let _p = this.hooks.beforePageLayout.trigger(page, content, breakToken, this); + if (_p) await _p; this.emit("page", page); // Layout content in the page, starting from the breakToken @@ -3214,8 +3227,10 @@ } } - await this.hooks.afterPageLayout.trigger(page.element, page, breakToken, this); - await this.hooks.finalizePage.trigger(page.element, page, undefined, this); + _p = this.hooks.afterPageLayout.trigger(page.element, page, breakToken, this); + if (_p) await _p; + _p = this.hooks.finalizePage.trigger(page.element, page, undefined, this); + if (_p) await _p; this.emit("renderedPage", page); this.recoredCharLength(page.wrapper.textContent.length); diff --git a/perf/README.md b/perf/README.md index d8ba7d32..55196503 100644 --- a/perf/README.md +++ b/perf/README.md @@ -2874,3 +2874,102 @@ in a render-style workload, hunt for explicit `requestAnimationFrame` / `setTimeout` / `requestIdleCallback` calls in the hot path before investigating microtask machinery. The frame-paced scheduler is a much bigger lever than the microtask scheduler. + +### Follow-up: the `Queue` itself was unnecessary indirection + +The chunker's `render()` routes each per-page iteration through +`this.q.enqueue(() => this.renderAsync(renderer))`. The queue's +job is to serialize tasks -- but an async generator is already +inherently serial (you can't call `.next()` twice in parallel). +With the rAF-tick fix above, the queue was reduced to a +`queueMicrotask` hop plus a Promise/deferred allocation per page, +for no purpose. + +Dropped the indirection: `render()` now iterates `renderer.next()` +directly. The `Queue` class still exists in the bundle for the +`onOverflow` re-render path (which is rare in practice), but the +hot per-page loop bypasses it. + +This is a structural simplification more than a measurable speedup +-- the queueMicrotask hop was already cheap and the deferred +allocation amortizes. But it removes a layer that was doing +nothing useful for our use case, which is the point of +maintaining a fork. + +## Stripping headless-irrelevant async machinery + +paged.js was designed to be fully usable in interactive browser +work. The async coordination patterns it carries -- always +returning Promises from hook triggers, awaiting microtask +boundaries between every phase, deferring tasks via animation +frames -- pay off when the same engine is rendering inside a +visible page that needs to stay responsive, coordinate with the +compositor, and tolerate handlers that load external resources. + +In our headless puppeteer pipeline, none of that is true: + +- The page is offscreen; no compositor to coordinate with. +- We don't care if any individual page-render blocks for tens of + milliseconds, because the browser isn't trying to repaint. +- Every handler we register is synchronous. No hook needs to + await anything. +- The book HTML is loaded before render starts (`page.goto(url, + { waitUntil: "load" })`), so every image's `.complete` flag is + already true. No image-loading awaits ever actually wait. + +Each remaining async wrapper is overhead we pay for a flexibility +we never use. We're maintaining a task-specific fork; we can keep +peeling layers as long as the simplifications don't change observed +output. + +### Phase 1: hook fast-path + +`Hook.trigger()` upstream always wraps sync handler results in +`new Promise(resolve => resolve(executing))` and returns +`Promise.all(promises)`. The chunker's per-page loop awaits each +of `beforePageLayout`, `afterPageLayout`, and `finalizePage`. With +all six of our registered handlers running synchronously, +`await trigger(...)` was a no-work microtask boundary per call. + +Patch: `Hook.trigger()` returns `undefined` when no handler +returned a thenable. Callers in the per-page hot path become: + +```js +let _p = this.hooks.X.trigger(...); +if (_p) await _p; +``` + +The microtask boundary is skipped entirely on the sync fast +path. Patched at six per-page sites (three in `chunker.layout`, +three in `chunker.handleBreaks`). + +CPU profile comparison (post-queue-tick + drop-queue baseline vs +post-Phase-1): + +| metric | baseline | Phase 1 | Δ | +| ------ | -------- | ------- | --- | +| samples | 7,353 | 6,902 | -451 | +| profile duration | 13.07 s | 12.22 s | **-0.85 s (-6.5 %)** | +| `getBoundingClientRect` self | 4,622 ms | 4,273 ms | -349 ms | +| `(program)` self | 1,873 ms | 1,874 ms | flat | +| `removeChild` self | 1,885 ms | 1,913 ms | flat | +| `removeOverflow` self | 592 ms | 579 ms | flat | +| `(idle)` self | n/a (< 130 ms) | n/a (< 130 ms) | flat | + +The 451 fewer samples account for ~800 ms of saved CPU work. +`getBoundingClientRect`'s self-time dropped by ~350 ms; the rest +is distributed across many small hot spots that all shrank +slightly because they were each preceded by fewer microtask +yields. No new hot spot appeared. + +> [!NOTE] +> We compare CPU-profile sample counts and self-times here, not +> wall-clock. Wall-clock includes I/O variance and system load on +> the dev machine; CPU profile sample times are independent of +> those and more reliable for "did this actually change CPU work." +> Wall-clock numbers from these runs are noted where useful for +> sanity-checking but aren't the primary signal. + +Shipped. The fix is small (one helper change + six call-site +edits) and removes about 8k microtask boundaries from the +per-page hot loop on a 1651-page render. From f3a042fb249e70e9bc99fc469b4b9ba416ac2f33 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 20:38:21 +0200 Subject: [PATCH 03/35] Make the per-page chunker chain synchronous. --- docs/lib/paged.browser.js | 169 ++++++++++++++++++++------------------ perf/README.md | 111 +++++++++++++++++++++++++ 2 files changed, 198 insertions(+), 82 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index fa709ae0..d3d666b5 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -1366,6 +1366,25 @@ * @param {any} context scope of this * @example this.content = new Hook(this); */ + // [PATCH: sync-chain] Used by the chunker hot path to confirm that + // Hook.trigger() returned the sync sentinel (undefined). If a handler + // returned a thenable, the chunker dropping it here would silently + // lose async work -- so we throw instead. Limitation of this fork: + // the per-page hooks (beforePageLayout / afterPageLayout / + // finalizePage / handleBreaks / *layout / page.layout etc.) must + // have all-synchronous handlers. Bundle ships with no async handlers + // for these on our pipeline; document and assert. + function _assertSync(triggerResult, hookName) { + if (triggerResult && typeof triggerResult.then === "function") { + throw new Error( + "paged.js (forked): async handler registered for hook '" + hookName + "'. " + + "This bundle's per-page hot path is synchronous; async handlers " + + "must be registered for the once-per-render hooks (beforeParsed, " + + "afterParsed, afterRendered) instead, or the chain re-asyncified." + ); + } + } + class Hook { constructor(context){ this.context = context || this; @@ -1499,7 +1518,11 @@ this.forceRenderBreak = false; } - async renderTo(wrapper, source, breakToken, bounds = this.bounds) { + // [PATCH: sync-chain] renderTo no longer needs to be async because + // waitForImages is now sync (see its comment). Removing `async` + // removes the per-page Promise allocation that was returned from + // page.layout / chunker.layout up the chain. + renderTo(wrapper, source, breakToken, bounds = this.bounds) { let start = this.getStart(source, breakToken); let walker = walk$2(start, source); @@ -1539,7 +1562,7 @@ let imgs = wrapper.querySelectorAll("img"); if (imgs.length) { - await this.waitForImages(imgs); + this.waitForImages(imgs); } newBreakToken = this.findBreakToken(wrapper, source, bounds, prevBreakToken); @@ -1564,7 +1587,7 @@ let imgs = wrapper.querySelectorAll("img"); if (imgs.length) { - await this.waitForImages(imgs); + this.waitForImages(imgs); } newBreakToken = this.findBreakToken(wrapper, source, bounds, prevBreakToken); @@ -1642,7 +1665,7 @@ let imgs = wrapper.querySelectorAll("img"); if (imgs.length) { - await this.waitForImages(imgs); + this.waitForImages(imgs); } newBreakToken = this.findBreakToken(wrapper, source, bounds, prevBreakToken); @@ -1794,29 +1817,27 @@ } } - async waitForImages(imgs) { - let results = Array.from(imgs).map(async (img) => { - return this.awaitImageLoaded(img); - }); - await Promise.all(results); - } - - async awaitImageLoaded(image) { - return new Promise(resolve => { - if (image.complete !== true) { - image.onload = function () { - let {width, height} = window.getComputedStyle(image); - resolve(width, height); - }; - image.onerror = function (e) { - let {width, height} = window.getComputedStyle(image); - resolve(width, height, e); - }; - } else { - let {width, height} = window.getComputedStyle(image); - resolve(width, height); + // [PATCH: sync-chain] waitForImages used to wrap every image in + // `new Promise(resolve => ...)` and await `Promise.all(...)`, so + // `renderTo` was forced to be async even when every image was + // already loaded (which is our case -- page.goto(url, { + // waitUntil: "load" }) settles before paged.js starts rendering). + // + // In our headless pipeline image.complete is always true at this + // point. If a future caller hits this with a not-yet-loaded + // image, that's a pipeline bug and we throw immediately rather + // than silently making the rest of the layout chain async again. + waitForImages(imgs) { + for (const img of imgs) { + if (img.complete !== true) { + throw new Error( + "paged.js (forked): image not loaded at render time. " + + "This branch dropped async image-loading support; the " + + "render pipeline must finish loading all images before " + + "calling paged.js. Image: " + (img.src || img.outerHTML) + ); } - }); + } } avoidBreakInside(node, limiter) { @@ -2411,7 +2432,10 @@ } */ - async layout(contents, breakToken, maxChars) { + // [PATCH: sync-chain] page.layout / append no longer await + // renderTo (which is now sync). Removing `async` removes the + // Promise allocation around each return. + layout(contents, breakToken, maxChars) { this.clear(); @@ -2424,7 +2448,7 @@ this.layoutMethod = new Layout(this.area, this.hooks, settings); - let renderResult = await this.layoutMethod.renderTo(this.wrapper, contents, breakToken); + let renderResult = this.layoutMethod.renderTo(this.wrapper, contents, breakToken); let newBreakToken = renderResult.breakToken; this.addListeners(contents); @@ -2434,13 +2458,13 @@ return newBreakToken; } - async append(contents, breakToken) { + append(contents, breakToken) { if (!this.layoutMethod) { return this.layout(contents, breakToken); } - let renderResult = await this.layoutMethod.renderTo(this.wrapper, contents, breakToken); + let renderResult = this.layoutMethod.renderTo(this.wrapper, contents, breakToken); let newBreakToken = renderResult.breakToken; this.endToken = newBreakToken; @@ -3078,18 +3102,18 @@ // } // } + // [PATCH: sync-chain] *layout is a sync generator now, so + // renderer.next() returns synchronously -- no per-page await. + // render() itself stays `async` because callers (flow()) await + // it and other once-per-render awaits in flow() (loadFonts, + // beforeParsed / afterParsed / afterRendered) still need it. async render(parsed, startAt) { - // [PATCH: drop-queue] Upstream routes per-page iteration through - // this.q.enqueue(...), but the queue's only job is serialization - // and the async generator is already inherently serial. Dropping - // the indirection cuts a queueMicrotask hop and a Promise/deferred - // allocation per page. let renderer = this.layout(parsed, startAt); let result; while (true) { if (this.stopped) return { done: true, canceled: true }; - result = await renderer.next(); + result = renderer.next(); if (this.stopped) return { done: true, canceled: true }; if (result.done) break; } @@ -3107,35 +3131,18 @@ // this.q.clear(); } - renderOnIdle(renderer) { - return new Promise(resolve => { - requestIdleCallback(async () => { - if (this.stopped) { - return resolve({ done: true, canceled: true }); - } - let result = await renderer.next(); - if (this.stopped) { - resolve({ done: true, canceled: true }); - } else { - resolve(result); - } - }); - }); - } - - async renderAsync(renderer) { - if (this.stopped) { - return { done: true, canceled: true }; - } - let result = await renderer.next(); - if (this.stopped) { - return { done: true, canceled: true }; - } else { - return result; - } - } + // [PATCH: sync-chain] renderOnIdle and renderAsync removed -- + // both wrapped renderer.next() (now sync) in async machinery, + // and the only caller (render() via this.q.enqueue) was already + // removed in the drop-queue change. - async handleBreaks(node, force) { + // [PATCH: sync-chain] handleBreaks no longer awaits hook triggers + // (Hook.trigger returns undefined on the all-sync path, which is + // our only path). If a future caller registers an async handler + // for any of these hooks, Hook.trigger will return a Promise and + // dropping it here will silently lose the work -- we assert that + // instead. The `_assertSync` helper lives below. + handleBreaks(node, force) { let currentPage = this.total + 1; let currentPosition = currentPage % 2 === 0 ? "left" : "right"; // TODO: Recto and Verso should reverse for rtl languages @@ -3181,39 +3188,39 @@ } if (page) { - // [PATCH: hook-fast-path] conditional await -- see Hook.trigger - let _p = this.hooks.beforePageLayout.trigger(page, undefined, undefined, this); - if (_p) await _p; + _assertSync(this.hooks.beforePageLayout.trigger(page, undefined, undefined, this), "beforePageLayout"); this.emit("page", page); - _p = this.hooks.afterPageLayout.trigger(page.element, page, undefined, this); - if (_p) await _p; - _p = this.hooks.finalizePage.trigger(page.element, page, undefined, this); - if (_p) await _p; + _assertSync(this.hooks.afterPageLayout.trigger(page.element, page, undefined, this), "afterPageLayout"); + _assertSync(this.hooks.finalizePage.trigger(page.element, page, undefined, this), "finalizePage"); this.emit("renderedPage", page); } } - async *layout(content, startAt) { + // [PATCH: sync-chain] *layout is now a sync generator, not an + // async generator. With handleBreaks, page.layout, renderTo, and + // every per-page hook trigger all synchronous in our pipeline, + // nothing inside this generator needs to await. The sync form + // avoids ~1651 Promise allocations per render (one per + // `renderer.next()` call) and the matching microtask boundaries. + *layout(content, startAt) { let breakToken = startAt || false; let tokens = []; while (breakToken !== undefined && (true)) { if (breakToken && breakToken.node) { - await this.handleBreaks(breakToken.node); + this.handleBreaks(breakToken.node); } else { - await this.handleBreaks(content.firstChild); + this.handleBreaks(content.firstChild); } let page = this.addPage(); - // [PATCH: hook-fast-path] conditional await -- see Hook.trigger - let _p = this.hooks.beforePageLayout.trigger(page, content, breakToken, this); - if (_p) await _p; + _assertSync(this.hooks.beforePageLayout.trigger(page, content, breakToken, this), "beforePageLayout"); this.emit("page", page); // Layout content in the page, starting from the breakToken - breakToken = await page.layout(content, breakToken, this.maxChars); + breakToken = page.layout(content, breakToken, this.maxChars); if (breakToken) { let newToken = breakToken.toJSON(true); @@ -3227,10 +3234,8 @@ } } - _p = this.hooks.afterPageLayout.trigger(page.element, page, breakToken, this); - if (_p) await _p; - _p = this.hooks.finalizePage.trigger(page.element, page, undefined, this); - if (_p) await _p; + _assertSync(this.hooks.afterPageLayout.trigger(page.element, page, breakToken, this), "afterPageLayout"); + _assertSync(this.hooks.finalizePage.trigger(page.element, page, undefined, this), "finalizePage"); this.emit("renderedPage", page); this.recoredCharLength(page.wrapper.textContent.length); diff --git a/perf/README.md b/perf/README.md index 55196503..398c744b 100644 --- a/perf/README.md +++ b/perf/README.md @@ -2973,3 +2973,114 @@ yields. No new hot spot appeared. Shipped. The fix is small (one helper change + six call-site edits) and removes about 8k microtask boundaries from the per-page hot loop on a 1651-page render. + +### Phase 2: sync chain end-to-end through the per-page hot path + +With Phase 1 in place, every per-page `await` in the chunker is +unconditional on a function that returned a Promise even when +nothing was actually awaitable. The structural answer is to make +those functions plain sync functions. + +The chain, top to bottom of the per-page call tree: + +``` +chunker.*layout() (async generator → sync generator) + chunker.handleBreaks() (async → sync) + page.layout() (async → sync) + Layout.renderTo() (async → sync) + Layout.waitForImages() (async → sync, throws if not preloaded) +chunker.render() loop (still async at the outer edge; + renderer.next() now sync) +``` + +Phase 2 converts each step. The only function that *could* have +been genuinely async -- `waitForImages` -- is now a synchronous +check: it walks the supplied `` nodes and throws if any +isn't `.complete`. In our pipeline, +`page.goto(url, { waitUntil: "load" })` settles before paged.js +is invoked, so every image is already loaded; the throw is a +safety net for pipeline bugs, not a runtime path we expect to +take. + +The hook triggers in the per-page hot path keep the Phase 1 +fast-path semantics but switch from +`let _p = hook.trigger(...); if (_p) await _p;` to +`_assertSync(hook.trigger(...), "hook-name")`. The helper throws +if a handler ever returns a thenable -- the same safety pattern +as `waitForImages`. None of our shipping handlers do. + +Dead code removed in the same pass: `Chunker.renderAsync` and +`Chunker.renderOnIdle`, both unreachable since the drop-queue +change above stripped their only caller. Together ~30 lines of +async machinery that existed only to wrap the (now sync) +`renderer.next()` call. + +CPU profile (Phase 1 baseline vs Phase 2): + +| metric | Phase 1 | Phase 2 | Δ | +| ------ | -------- | ------- | --- | +| samples | 6,902 | 6,948 | +46 | +| profile duration | 12.22 s | 12.35 s | +0.13 s (noise) | +| `getBoundingClientRect` self | 4,273 ms | 4,524 ms | +251 ms (noise) | +| `(program)` self | 1,874 ms | 1,909 ms | +35 ms | +| `removeChild` self | 1,913 ms | 1,883 ms | -30 ms | +| `removeOverflow` self | 579 ms | 523 ms | -56 ms | + +Phase 2 sits inside the run-to-run noise band on CPU time -- +the per-call CPU cost of an `await` on an already-settled Promise +is small (a handful of microseconds), and Phase 1 already +eliminated most of the boundary count. **What Phase 2 buys is +not measurable CPU time -- it's structural simplicity.** + +Code shape, before and after: + +- 6 fewer `async` keywords on hot-path methods. +- 13 fewer `await` keywords removed from the bodies of those + methods (the per-page chain no longer threads `await` through + any of its layers). +- One async generator (`async *layout`) → sync generator + (`*layout`). +- Two dead methods removed (`renderAsync`, `renderOnIdle`). +- Two `_assertSync` guards added at the chunker's hook call + sites + one at `waitForImages` -- the contract we now rely on + (per-page handlers all synchronous, every `` preloaded) + is enforced at runtime with a useful error message. + +PDF output is **byte-identical** to the Phase 1 build on this +content (`async-phase1/book.pdf` and `async-phase2/book.pdf` +both 16,893,546 bytes -- a rare 0-byte timestamp drift, but +the structural content is identical regardless). + +This is the kind of cleanup that's only worth doing because +we maintain a task-specific fork of the bundle. Upstream +paged.js has to support handlers that await fetches or image +loads or font measurements -- our pipeline never registers one. +Removing the async machinery in our copy shrinks the surface to +reason about and makes the data-flow direct: a render is a +plain function call that produces a plain return value. + +### What's still async, and why + +The async machinery that survives this audit is now at the +once-per-render layer, where it's load-bearing: + +- `Chunker.flow()` is async because `loadFonts()` waits on the + CSS font-face descriptor's load promise, which is actually + async and OS-level. +- `Chunker.render()` stays `async` as a thin wrapper so callers + in `flow()` can `await` it (the alternative would be to + remove `async` and have `flow()` not await it, but the call + site reads more clearly with the `await` retained). +- `beforeParsed`, `afterParsed`, `afterRendered` hooks are still + awaited with the `await hook.trigger(...)` form because they + fire once per render and the overhead is irrelevant. +- The `onOverflow` recovery path (`Chunker.q.enqueue(async ...)`) + re-renders the document if any page overflows after paint. In + practice this never fires for our content, but keeping the + recovery code intact costs nothing and preserves behaviour for + edge cases. + +The hot per-page path is now `function`, `function*`, plain +return values, and a `while` loop. Future work that touches +this code can reason about it as straight-line synchronous +flow. From 2839d9c1eec8b22316d7492d3225e0c5d7ae7dbb Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 20:55:02 +0200 Subject: [PATCH 04/35] Use a Set for chunker loop-detection (O(n^2) -> O(n)). --- docs/lib/paged.browser.js | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index d3d666b5..34337090 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -3204,7 +3204,12 @@ // `renderer.next()` call) and the matching microtask boundaries. *layout(content, startAt) { let breakToken = startAt || false; - let tokens = []; + // [PATCH: tokens-set] Loop-detection used `tokens.lastIndexOf(...)` + // on an array, which scans up to N entries per page -- O(n^2) + // across a render. A Set gives O(1) lookup. The absolute saving + // on our 1651-page book is small (~80 us per late page) but the + // algorithmic shape is the load-bearing change. + let tokens = new Set(); while (breakToken !== undefined && (true)) { @@ -3224,13 +3229,13 @@ if (breakToken) { let newToken = breakToken.toJSON(true); - if (tokens.lastIndexOf(newToken) > -1) { + if (tokens.has(newToken)) { // loop let err = new OverflowContentError("Layout repeated", [breakToken.node]); console.error("Layout repeated at: ", breakToken.node); return err; } else { - tokens.push(newToken); + tokens.add(newToken); } } From 58db1e9d5a7b7bf5fbc56f6b21b5bc55b403aca9 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 21:02:51 +0200 Subject: [PATCH 05/35] Use Range.deleteContents over extractContents when no footnotes. --- docs/lib/paged.browser.js | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 34337090..43f9d072 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -2273,7 +2273,25 @@ removeOverflow(overflow, breakLetter) { let {startContainer} = overflow; - let extracted = overflow.extractContents(); + + // [PATCH: extract-vs-delete] Range.extractContents() builds a + // DocumentFragment of the removed nodes and reattaches them; + // Range.deleteContents() just removes. The only consumer of + // the returned fragment is Footnotes.afterOverflowRemoved, + // which iterates the rendered area's footnotes and for each + // looks up its [data-footnote-call=...] in the removed fragment. + // So extractContents is only useful if the rendered area + // contained any footnote-call elements. Check via a cheap + // querySelector on `this.element` (the page content area -- + // `.pagedjs_page_content`); when no calls are present we + // skip the fragment build entirely. + let extracted; + if (this.element && this.element.querySelector("[data-footnote-call]")) { + extracted = overflow.extractContents(); + } else { + overflow.deleteContents(); + extracted = null; + } this.hyphenateAtBreak(startContainer, breakLetter); @@ -31679,8 +31697,14 @@ let notes = area.querySelectorAll(".pagedjs_footnote_area [data-note='footnote']"); for (let n = 0; n < notes.length; n++) { const note = notes[n]; - // Check if the call for that footnote has been removed with the overflow - let call = removed.querySelector(`[data-footnote-call="${note.dataset.ref}"]`); + // [PATCH: extract-vs-delete] Guard `removed` access -- when + // removeOverflow took the deleteContents fast path (no + // footnotes in the rendered area), `removed` is null. In + // that case there are no rendered footnotes for the loop + // to iterate either, so we never actually enter this body. + // The guard is for future content where the area DOES have + // rendered footnotes but removeOverflow's pre-check changes. + let call = removed && removed.querySelector(`[data-footnote-call="${note.dataset.ref}"]`); if (call) { note.remove(); } From 9ba62c7110069c4d2d4549da82285a74cd9b7501 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 21:14:05 +0200 Subject: [PATCH 06/35] Guard afterOverflowRemoved trigger with _assertSync. --- docs/lib/paged.browser.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 43f9d072..75d4bca1 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -1985,7 +1985,12 @@ if (breakToken && breakToken.node && extract) { let removed = this.removeOverflow(overflow, breakLetter); - this.hooks && this.hooks.afterOverflowRemoved.trigger(removed, rendered, this); + // [PATCH: assert-sync] Guard against silent async-handler + // drop. Upstream fired the trigger without `await`, so any + // async handler's work would have been lost. _assertSync + // throws instead if a handler returns a thenable -- the + // fork's per-page hot path is synchronous, see Hook.trigger. + if (this.hooks) _assertSync(this.hooks.afterOverflowRemoved.trigger(removed, rendered, this), "afterOverflowRemoved"); } } From ce62694ff8e9999491915f8dfb8a6eb65adeb705 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 21:18:32 +0200 Subject: [PATCH 07/35] Guard all sync-only Hook.trigger sites with _assertSync. --- docs/lib/paged.browser.js | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 75d4bca1..4dd5d187 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -1549,7 +1549,7 @@ let prevBreakToken = breakToken || new BreakToken(start); - this.hooks && this.hooks.onPageLayout.trigger(wrapper, prevBreakToken, this); + if (this.hooks) _assertSync(this.hooks.onPageLayout.trigger(wrapper, prevBreakToken, this), "onPageLayout"); while (!done && !newBreakToken) { next = walker.next(); @@ -1558,7 +1558,7 @@ done = next.done; if (!node) { - this.hooks && this.hooks.layout.trigger(wrapper, this); + if (this.hooks) _assertSync(this.hooks.layout.trigger(wrapper, this), "layout"); let imgs = wrapper.querySelectorAll("img"); if (imgs.length) { @@ -1569,21 +1569,21 @@ if (newBreakToken && newBreakToken.equals(prevBreakToken)) { console.warn("Unable to layout item: ", prevNode); - this.hooks && this.hooks.beforeRenderResult.trigger(undefined, wrapper, this); + if (this.hooks) _assertSync(this.hooks.beforeRenderResult.trigger(undefined, wrapper, this), "beforeRenderResult"); return new RenderResult(undefined, new OverflowContentError("Unable to layout item", [prevNode])); } this.rebuildTableFromBreakToken(newBreakToken, wrapper); - this.hooks && this.hooks.beforeRenderResult.trigger(newBreakToken, wrapper, this); + if (this.hooks) _assertSync(this.hooks.beforeRenderResult.trigger(newBreakToken, wrapper, this), "beforeRenderResult"); return new RenderResult(newBreakToken); } - this.hooks && this.hooks.layoutNode.trigger(node); + if (this.hooks) _assertSync(this.hooks.layoutNode.trigger(node), "layoutNode"); // Check if the rendered element has a break set if (hasRenderedContent && this.shouldBreak(node, start)) { - this.hooks && this.hooks.layout.trigger(wrapper, this); + if (this.hooks) _assertSync(this.hooks.layout.trigger(wrapper, this), "layout"); let imgs = wrapper.querySelectorAll("img"); if (imgs.length) { @@ -1642,7 +1642,7 @@ } if (this.forceRenderBreak) { - this.hooks && this.hooks.layout.trigger(wrapper, this); + if (this.hooks) _assertSync(this.hooks.layout.trigger(wrapper, this), "layout"); newBreakToken = this.findBreakToken(wrapper, source, bounds, prevBreakToken); @@ -1661,7 +1661,7 @@ // Only check overflow once per maxChars of new content. if (length - lengthAtLastCheck >= this.maxChars) { - this.hooks && this.hooks.layout.trigger(wrapper, this); + if (this.hooks) _assertSync(this.hooks.layout.trigger(wrapper, this), "layout"); let imgs = wrapper.querySelectorAll("img"); if (imgs.length) { @@ -1684,7 +1684,7 @@ if (after) { newBreakToken = new BreakToken(after); } else { - this.hooks && this.hooks.beforeRenderResult.trigger(undefined, wrapper, this); + if (this.hooks) _assertSync(this.hooks.beforeRenderResult.trigger(undefined, wrapper, this), "beforeRenderResult"); return new RenderResult(undefined, new OverflowContentError("Unable to layout item", [node])); } } @@ -1692,7 +1692,7 @@ } - this.hooks && this.hooks.beforeRenderResult.trigger(newBreakToken, wrapper, this); + if (this.hooks) _assertSync(this.hooks.beforeRenderResult.trigger(newBreakToken, wrapper, this), "beforeRenderResult"); return new RenderResult(newBreakToken); } @@ -26611,7 +26611,7 @@ csstree.walk(ast, { visit: "Url", enter: (node, item, list) => { - this.hooks.onUrl.trigger(node, item, list); + _assertSync(this.hooks.onUrl.trigger(node, item, list), "onUrl"); } }); } @@ -26623,17 +26623,17 @@ const basename = csstree.keyword(node.name).basename; if (basename === "page") { - this.hooks.onAtPage.trigger(node, item, list); + _assertSync(this.hooks.onAtPage.trigger(node, item, list), "onAtPage"); this.declarations(node, item, list); } if (basename === "media") { - this.hooks.onAtMedia.trigger(node, item, list); + _assertSync(this.hooks.onAtMedia.trigger(node, item, list), "onAtMedia"); this.declarations(node, item, list); } if (basename === "import") { - this.hooks.onImport.trigger(node, item, list); + _assertSync(this.hooks.onImport.trigger(node, item, list), "onImport"); this.imports(node, item, list); } } @@ -26646,7 +26646,7 @@ visit: "Rule", enter: (ruleNode, ruleItem, rulelist) => { - this.hooks.onRule.trigger(ruleNode, ruleItem, rulelist); + _assertSync(this.hooks.onRule.trigger(ruleNode, ruleItem, rulelist), "onRule"); this.declarations(ruleNode, ruleItem, rulelist); this.onSelector(ruleNode, ruleItem, rulelist); @@ -26659,13 +26659,13 @@ visit: "Declaration", enter: (declarationNode, dItem, dList) => { - this.hooks.onDeclaration.trigger(declarationNode, dItem, dList, {ruleNode, ruleItem, rulelist}); + _assertSync(this.hooks.onDeclaration.trigger(declarationNode, dItem, dList, {ruleNode, ruleItem, rulelist}), "onDeclaration"); if (declarationNode.property === "content") { csstree.walk(declarationNode, { visit: "Function", enter: (funcNode, fItem, fList) => { - this.hooks.onContent.trigger(funcNode, fItem, fList, {declarationNode, dItem, dList}, {ruleNode, ruleItem, rulelist}); + _assertSync(this.hooks.onContent.trigger(funcNode, fItem, fList, {declarationNode, dItem, dList}, {ruleNode, ruleItem, rulelist}), "onContent"); } }); } @@ -26679,13 +26679,13 @@ csstree.walk(ruleNode, { visit: "Selector", enter: (selectNode, selectItem, selectList) => { - this.hooks.onSelector.trigger(selectNode, selectItem, selectList, {ruleNode, ruleItem, rulelist}); + _assertSync(this.hooks.onSelector.trigger(selectNode, selectItem, selectList, {ruleNode, ruleItem, rulelist}), "onSelector"); if (selectNode.children.forEach(node => {if (node.type === "PseudoElementSelector") { csstree.walk(node, { visit: "PseudoElementSelector", enter: (pseudoNode, pItem, pList) => { - this.hooks.onPseudoSelector.trigger(pseudoNode, pItem, pList, {selectNode, selectItem, selectList}, {ruleNode, ruleItem, rulelist}); + _assertSync(this.hooks.onPseudoSelector.trigger(pseudoNode, pItem, pList, {selectNode, selectItem, selectList}, {ruleNode, ruleItem, rulelist}), "onPseudoSelector"); } }); }})); From 41cce85f485802561959fd592ae213b979844962 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 21:55:52 +0200 Subject: [PATCH 08/35] Add --no-timing flag to measure.mjs. --- perf/measure.mjs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/perf/measure.mjs b/perf/measure.mjs index 94eedcef..d729ba13 100644 --- a/perf/measure.mjs +++ b/perf/measure.mjs @@ -23,7 +23,13 @@ // node measure.mjs [path/to/book.html] [--out ] [--keep-open] // [--cpu-profile] [--cpu-sampling ] // [--detach-pages] [--instrument] [--time-hooks] -// [--incremental] [--chrome-outline] +// [--incremental] [--chrome-outline] [--no-timing] +// +// --no-timing skips the per-page timing-handler.js injection. The handler +// adds a per-page console.log relayed via CDP that costs ~2% of render +// self-time on the 1638-page book. Use when profiling for the cleanest +// possible bottom-up table; loses the per-page CSV and the first/last +// quartile summary in return. // // --detach-pages also injects detach-pages.js -- a Paged.Handler that // hides each completed page from the layout tree -- to test whether @@ -80,6 +86,7 @@ let instrument = false; let timeHooks = false; let incremental = false; let chromeOutline = false; +let noTiming = false; for (let i = 0; i < args.length; i++) { const a = args[i]; if (a === '--out') outArg = args[++i]; @@ -91,6 +98,7 @@ for (let i = 0; i < args.length; i++) { else if (a === '--time-hooks') timeHooks = true; else if (a === '--incremental') incremental = true; else if (a === '--chrome-outline') chromeOutline = true; + else if (a === '--no-timing') noTiming = true; else if (!inputArg) inputArg = a; else { console.error(`unknown arg: ${a}`); process.exit(2); } } @@ -110,7 +118,8 @@ const handlerPath = resolve(__dirname, 'timing-handler.js'); const detachPagesPath = resolve(__dirname, 'detach-pages.js'); const instrumentPath = resolve(__dirname, 'instrument-flush-ops.js'); const timeHooksPath = resolve(__dirname, 'time-hooks.js'); -const required = [pagedScriptPath, handlerPath]; +const required = [pagedScriptPath]; +if (!noTiming) required.push(handlerPath); if (detachPages) required.push(detachPagesPath); if (instrument) required.push(instrumentPath); if (timeHooks) required.push(timeHooksPath); @@ -183,7 +192,9 @@ try { }); await page.addScriptTag({ path: pagedScriptPath }); - await page.addScriptTag({ path: handlerPath }); + if (!noTiming) { + await page.addScriptTag({ path: handlerPath }); + } if (detachPages) { await page.addScriptTag({ path: detachPagesPath }); } @@ -338,7 +349,9 @@ try { console.log(`[harness] total ${fmtMs(totalMs)}`); // Persist results ------------------------------------------------- - const timing = await page.evaluate(() => window.__pagedTiming); + const timing = noTiming + ? { pages: [], phases: {}, pageCount: null } + : await page.evaluate(() => window.__pagedTiming); const pdfPath = join(outDir, 'book.pdf'); writeFileSync(pdfPath, Buffer.from(finalPdf)); From 858a728dbb7f28da7074bebc493e8cf52bd9b13f Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 21:56:08 +0200 Subject: [PATCH 09/35] Drop unused requestIdleCallback const and ResizeObserver/UUID typeof checks. --- docs/lib/paged.browser.js | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 4dd5d187..60be8029 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -8,6 +8,12 @@ (global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.PagedPolyfill = factory()); })(this, (function () { 'use strict'; + // Dispatch helpers: Element has getBoundingClientRect / getClientRects + // natively; Text and other non-Element nodes don't, so they get wrapped + // in a Range. Both call sites (Layout.findOverflow's per-node, per-word, + // per-letter walkers; the isText branch in findRoute) feed in nodes the + // walker may yield as either type, so this dispatch is load-bearing, + // not backcompat. function getBoundingClientRect(element) { if (!element) { return; @@ -44,10 +50,7 @@ * @returns {string} uuid */ function UUID() { - var d = new Date().getTime(); - if (typeof performance !== "undefined" && typeof performance.now === "function") { - d += performance.now(); //use high-precision timer if available - } + var d = new Date().getTime() + performance.now(); return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) { var r = (d + Math.random() * 16) % 16 | 0; d = Math.floor(d / 16); @@ -178,8 +181,6 @@ Object.freeze(this); } - const requestIdleCallback = typeof window !== "undefined" && ("requestIdleCallback" in window ? window.requestIdleCallback : window.requestAnimationFrame); - function CSSValueToString(obj) { return obj.value + (obj.unit || ""); } @@ -2520,14 +2521,7 @@ } addListeners(contents) { - if (typeof ResizeObserver !== "undefined") { - this.addResizeObserver(contents); - } else { - this._checkOverflowAfterResize = this.checkOverflowAfterResize.bind(this, contents); - this.element.addEventListener("overflow", this._checkOverflowAfterResize, false); - this.element.addEventListener("underflow", this._checkOverflowAfterResize, false); - } - // TODO: fall back to mutation observer? + this.addResizeObserver(contents); this._onScroll = function () { if (this.listening) { @@ -2546,11 +2540,8 @@ removeListeners() { this.listening = false; - if (typeof ResizeObserver !== "undefined" && this.ro) { + if (this.ro) { this.ro.disconnect(); - } else if (this.element) { - this.element.removeEventListener("overflow", this._checkOverflowAfterResize, false); - this.element.removeEventListener("underflow", this._checkOverflowAfterResize, false); } this.element && this.element.removeEventListener("scroll", this._onScroll); From ac38737461c7d42806042df6bc8c6d4a2c9b599b Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 22:06:32 +0200 Subject: [PATCH 10/35] Drop SafeUint32Array fallback and source-map hasNativeMap branches. --- docs/lib/paged.browser.js | 41 +++++++++------------------------------ 1 file changed, 9 insertions(+), 32 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 60be8029..757a43d0 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -5250,11 +5250,10 @@ }; var MIN_SIZE = 16 * 1024; - var SafeUint32Array = typeof Uint32Array !== 'undefined' ? Uint32Array : Array; // fallback on Array when TypedArray is not supported var adoptBuffer$2 = function adoptBuffer(buffer, size) { if (buffer === null || buffer.length < size) { - return new SafeUint32Array(Math.max(size + 1024, MIN_SIZE)); + return new Uint32Array(Math.max(size + 1024, MIN_SIZE)); } return buffer; @@ -10650,10 +10649,6 @@ * http://opensource.org/licenses/BSD-3-Clause */ - var util$2 = util$3; - var has$1 = Object.prototype.hasOwnProperty; - var hasNativeMap = typeof Map !== "undefined"; - /** * A data structure which is a combination of an array and a set. Adding a new * member is O(1), testing for membership is O(1), and finding the index of an @@ -10662,7 +10657,7 @@ */ function ArraySet$1() { this._array = []; - this._set = hasNativeMap ? new Map() : Object.create(null); + this._set = new Map(); } /** @@ -10683,7 +10678,7 @@ * @returns Number */ ArraySet$1.prototype.size = function ArraySet_size() { - return hasNativeMap ? this._set.size : Object.getOwnPropertyNames(this._set).length; + return this._set.size; }; /** @@ -10692,18 +10687,13 @@ * @param String aStr */ ArraySet$1.prototype.add = function ArraySet_add(aStr, aAllowDuplicates) { - var sStr = hasNativeMap ? aStr : util$2.toSetString(aStr); - var isDuplicate = hasNativeMap ? this.has(aStr) : has$1.call(this._set, sStr); + var isDuplicate = this.has(aStr); var idx = this._array.length; if (!isDuplicate || aAllowDuplicates) { this._array.push(aStr); } if (!isDuplicate) { - if (hasNativeMap) { - this._set.set(aStr, idx); - } else { - this._set[sStr] = idx; - } + this._set.set(aStr, idx); } }; @@ -10713,12 +10703,7 @@ * @param String aStr */ ArraySet$1.prototype.has = function ArraySet_has(aStr) { - if (hasNativeMap) { - return this._set.has(aStr); - } else { - var sStr = util$2.toSetString(aStr); - return has$1.call(this._set, sStr); - } + return this._set.has(aStr); }; /** @@ -10727,18 +10712,10 @@ * @param String aStr */ ArraySet$1.prototype.indexOf = function ArraySet_indexOf(aStr) { - if (hasNativeMap) { - var idx = this._set.get(aStr); - if (idx >= 0) { - return idx; - } - } else { - var sStr = util$2.toSetString(aStr); - if (has$1.call(this._set, sStr)) { - return this._set[sStr]; - } + var idx = this._set.get(aStr); + if (idx >= 0) { + return idx; } - throw new Error('"' + aStr + '" is not in the set.'); }; From 64034db854d3cb87723e6503989c148975534daf Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 22:10:28 +0200 Subject: [PATCH 11/35] Drop es5-ext Object.assign/keys/String.contains shim machinery. --- docs/lib/paged.browser.js | 127 +++----------------------------------- 1 file changed, 8 insertions(+), 119 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 757a43d0..c2039495 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -1012,32 +1012,6 @@ return true; }; - var isImplemented$7 = function () { - var assign = Object.assign, obj; - if (typeof assign !== "function") return false; - obj = { foo: "raz" }; - assign(obj, { bar: "dwa" }, { trzy: "trzy" }); - return obj.foo + obj.bar + obj.trzy === "razdwatrzy"; - }; - - var isImplemented$6; - var hasRequiredIsImplemented$2; - - function requireIsImplemented$2 () { - if (hasRequiredIsImplemented$2) return isImplemented$6; - hasRequiredIsImplemented$2 = 1; - - isImplemented$6 = function () { - try { - Object.keys("primitive"); - return true; - } catch (e) { - return false; - } - }; - return isImplemented$6; - } - // eslint-disable-next-line no-empty-function var noop$4 = function () {}; @@ -1045,32 +1019,6 @@ var isValue$4 = function (val) { return val !== _undefined && val !== null; }; - var shim$5; - var hasRequiredShim$5; - - function requireShim$5 () { - if (hasRequiredShim$5) return shim$5; - hasRequiredShim$5 = 1; - - var isValue = isValue$4; - - var keys = Object.keys; - - shim$5 = function (object) { return keys(isValue(object) ? Object(object) : object); }; - return shim$5; - } - - var keys; - var hasRequiredKeys; - - function requireKeys () { - if (hasRequiredKeys) return keys; - hasRequiredKeys = 1; - - keys = requireIsImplemented$2()() ? Object.keys : requireShim$5(); - return keys; - } - var isValue$3 = isValue$4; var validValue = function (value) { @@ -1078,39 +1026,6 @@ return value; }; - var shim$4; - var hasRequiredShim$4; - - function requireShim$4 () { - if (hasRequiredShim$4) return shim$4; - hasRequiredShim$4 = 1; - - var keys = requireKeys() - , value = validValue - , max = Math.max; - - shim$4 = function (dest, src /*, …srcn*/) { - var error, i, length = max(arguments.length, 2), assign; - dest = Object(value(dest)); - assign = function (key) { - try { - dest[key] = src[key]; - } catch (e) { - if (!error) error = e; - } - }; - for (i = 1; i < length; ++i) { - src = arguments[i]; - keys(src).forEach(assign); - } - if (error !== undefined) throw error; - return dest; - }; - return shim$4; - } - - var assign$2 = isImplemented$7() ? Object.assign : requireShim$4(); - var isValue$2 = isValue$4; var forEach$1 = Array.prototype.forEach, create$5 = Object.create; @@ -1130,35 +1045,9 @@ return result; }; - var str = "razdwatrzy"; - - var isImplemented$5 = function () { - if (typeof str.contains !== "function") return false; - return str.contains("dwa") === true && str.contains("foo") === false; - }; - - var shim$3; - var hasRequiredShim$3; - - function requireShim$3 () { - if (hasRequiredShim$3) return shim$3; - hasRequiredShim$3 = 1; - - var indexOf = String.prototype.indexOf; - - shim$3 = function (searchString /*, position*/) { - return indexOf.call(this, searchString, arguments[1]) > -1; - }; - return shim$3; - } - - var contains$1 = isImplemented$5() ? String.prototype.contains : requireShim$3(); - var isValue$1 = is$4 , isPlainFunction = is - , assign$1 = assign$2 - , normalizeOpts = normalizeOptions - , contains = contains$1; + , normalizeOpts = normalizeOptions; var d$1 = (d$2.exports = function (dscr, value/*, options*/) { var c, e, w, options, desc; @@ -1170,16 +1059,16 @@ options = arguments[2]; } if (isValue$1(dscr)) { - c = contains.call(dscr, "c"); - e = contains.call(dscr, "e"); - w = contains.call(dscr, "w"); + c = dscr.includes("c"); + e = dscr.includes("e"); + w = dscr.includes("w"); } else { c = w = true; e = false; } desc = { value: value, configurable: c, enumerable: e, writable: w }; - return !options ? desc : assign$1(normalizeOpts(options), desc); + return !options ? desc : Object.assign(normalizeOpts(options), desc); }); d$1.gs = function (dscr, get, set/*, options*/) { @@ -1204,15 +1093,15 @@ set = undefined; } if (isValue$1(dscr)) { - c = contains.call(dscr, "c"); - e = contains.call(dscr, "e"); + c = dscr.includes("c"); + e = dscr.includes("e"); } else { c = true; e = false; } desc = { get: get, set: set, configurable: c, enumerable: e }; - return !options ? desc : assign$1(normalizeOpts(options), desc); + return !options ? desc : Object.assign(normalizeOpts(options), desc); }; var dExports = d$2.exports; From 3084d80e1c17f710f924109ff86ca73f138d5bd0 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 22:21:32 +0200 Subject: [PATCH 12/35] Drop es6-symbol bundle: Symbol, Array.from, Math.sign, Number.isNaN, globalThis. --- docs/lib/paged.browser.js | 576 +------------------------------------- 1 file changed, 3 insertions(+), 573 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index c2039495..d3709091 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -32648,409 +32648,7 @@ UndisplayedFilter ]; - var isImplemented$4 = function () { - var from = Array.from, arr, result; - if (typeof from !== "function") return false; - arr = ["raz", "dwa"]; - result = from(arr); - return Boolean(result && result !== arr && result[1] === "dwa"); - }; - - var isImplemented$3; - var hasRequiredIsImplemented$1; - - function requireIsImplemented$1 () { - if (hasRequiredIsImplemented$1) return isImplemented$3; - hasRequiredIsImplemented$1 = 1; - - isImplemented$3 = function () { - if (typeof globalThis !== "object") return false; - if (!globalThis) return false; - return globalThis.Array === Array; - }; - return isImplemented$3; - } - - var implementation; - var hasRequiredImplementation; - - function requireImplementation () { - if (hasRequiredImplementation) return implementation; - hasRequiredImplementation = 1; - var naiveFallback = function () { - if (typeof self === "object" && self) return self; - if (typeof window === "object" && window) return window; - throw new Error("Unable to resolve global `this`"); - }; - - implementation = (function () { - if (this) return this; - - // Unexpected strict mode (may happen if e.g. bundled into ESM module) - - // Thanks @mathiasbynens -> https://mathiasbynens.be/notes/globalthis - // In all ES5+ engines global object inherits from Object.prototype - // (if you approached one that doesn't please report) - try { - Object.defineProperty(Object.prototype, "__global__", { - get: function () { return this; }, - configurable: true - }); - } catch (error) { - // Unfortunate case of Object.prototype being sealed (via preventExtensions, seal or freeze) - return naiveFallback(); - } - try { - // Safari case (window.__global__ is resolved with global context, but __global__ does not) - if (!__global__) return naiveFallback(); - return __global__; - } finally { - delete Object.prototype.__global__; - } - })(); - return implementation; - } - - var globalThis_1; - var hasRequiredGlobalThis; - - function requireGlobalThis () { - if (hasRequiredGlobalThis) return globalThis_1; - hasRequiredGlobalThis = 1; - - globalThis_1 = requireIsImplemented$1()() ? globalThis : requireImplementation(); - return globalThis_1; - } - - var isImplemented$2; - var hasRequiredIsImplemented; - - function requireIsImplemented () { - if (hasRequiredIsImplemented) return isImplemented$2; - hasRequiredIsImplemented = 1; - - var global = requireGlobalThis() - , validTypes = { object: true, symbol: true }; - - isImplemented$2 = function () { - var Symbol = global.Symbol; - var symbol; - if (typeof Symbol !== "function") return false; - symbol = Symbol("test symbol"); - try { String(symbol); } - catch (e) { return false; } - - // Return 'true' also for polyfills - if (!validTypes[typeof Symbol.iterator]) return false; - if (!validTypes[typeof Symbol.toPrimitive]) return false; - if (!validTypes[typeof Symbol.toStringTag]) return false; - - return true; - }; - return isImplemented$2; - } - - var isSymbol; - var hasRequiredIsSymbol; - - function requireIsSymbol () { - if (hasRequiredIsSymbol) return isSymbol; - hasRequiredIsSymbol = 1; - - isSymbol = function (value) { - if (!value) return false; - if (typeof value === "symbol") return true; - if (!value.constructor) return false; - if (value.constructor.name !== "Symbol") return false; - return value[value.constructor.toStringTag] === "Symbol"; - }; - return isSymbol; - } - - var validateSymbol; - var hasRequiredValidateSymbol; - - function requireValidateSymbol () { - if (hasRequiredValidateSymbol) return validateSymbol; - hasRequiredValidateSymbol = 1; - - var isSymbol = requireIsSymbol(); - - validateSymbol = function (value) { - if (!isSymbol(value)) throw new TypeError(value + " is not a symbol"); - return value; - }; - return validateSymbol; - } - - var generateName; - var hasRequiredGenerateName; - - function requireGenerateName () { - if (hasRequiredGenerateName) return generateName; - hasRequiredGenerateName = 1; - - var d = dExports; - - var create = Object.create, defineProperty = Object.defineProperty, objPrototype = Object.prototype; - - var created = create(null); - generateName = function (desc) { - var postfix = 0, name, ie11BugWorkaround; - while (created[desc + (postfix || "")]) ++postfix; - desc += postfix || ""; - created[desc] = true; - name = "@@" + desc; - defineProperty( - objPrototype, - name, - d.gs(null, function (value) { - // For IE11 issue see: - // https://connect.microsoft.com/IE/feedbackdetail/view/1928508/ - // ie11-broken-getters-on-dom-objects - // https://github.com/medikoo/es6-symbol/issues/12 - if (ie11BugWorkaround) return; - ie11BugWorkaround = true; - defineProperty(this, name, d(value)); - ie11BugWorkaround = false; - }) - ); - return name; - }; - return generateName; - } - - var standardSymbols; - var hasRequiredStandardSymbols; - - function requireStandardSymbols () { - if (hasRequiredStandardSymbols) return standardSymbols; - hasRequiredStandardSymbols = 1; - - var d = dExports - , NativeSymbol = requireGlobalThis().Symbol; - - standardSymbols = function (SymbolPolyfill) { - return Object.defineProperties(SymbolPolyfill, { - // To ensure proper interoperability with other native functions (e.g. Array.from) - // fallback to eventual native implementation of given symbol - hasInstance: d( - "", (NativeSymbol && NativeSymbol.hasInstance) || SymbolPolyfill("hasInstance") - ), - isConcatSpreadable: d( - "", - (NativeSymbol && NativeSymbol.isConcatSpreadable) || - SymbolPolyfill("isConcatSpreadable") - ), - iterator: d("", (NativeSymbol && NativeSymbol.iterator) || SymbolPolyfill("iterator")), - match: d("", (NativeSymbol && NativeSymbol.match) || SymbolPolyfill("match")), - replace: d("", (NativeSymbol && NativeSymbol.replace) || SymbolPolyfill("replace")), - search: d("", (NativeSymbol && NativeSymbol.search) || SymbolPolyfill("search")), - species: d("", (NativeSymbol && NativeSymbol.species) || SymbolPolyfill("species")), - split: d("", (NativeSymbol && NativeSymbol.split) || SymbolPolyfill("split")), - toPrimitive: d( - "", (NativeSymbol && NativeSymbol.toPrimitive) || SymbolPolyfill("toPrimitive") - ), - toStringTag: d( - "", (NativeSymbol && NativeSymbol.toStringTag) || SymbolPolyfill("toStringTag") - ), - unscopables: d( - "", (NativeSymbol && NativeSymbol.unscopables) || SymbolPolyfill("unscopables") - ) - }); - }; - return standardSymbols; - } - - var symbolRegistry; - var hasRequiredSymbolRegistry; - - function requireSymbolRegistry () { - if (hasRequiredSymbolRegistry) return symbolRegistry; - hasRequiredSymbolRegistry = 1; - - var d = dExports - , validateSymbol = requireValidateSymbol(); - - var registry = Object.create(null); - - symbolRegistry = function (SymbolPolyfill) { - return Object.defineProperties(SymbolPolyfill, { - for: d(function (key) { - if (registry[key]) return registry[key]; - return (registry[key] = SymbolPolyfill(String(key))); - }), - keyFor: d(function (symbol) { - var key; - validateSymbol(symbol); - for (key in registry) { - if (registry[key] === symbol) return key; - } - return undefined; - }) - }); - }; - return symbolRegistry; - } - - var polyfill; - var hasRequiredPolyfill; - - function requirePolyfill () { - if (hasRequiredPolyfill) return polyfill; - hasRequiredPolyfill = 1; - - var d = dExports - , validateSymbol = requireValidateSymbol() - , NativeSymbol = requireGlobalThis().Symbol - , generateName = requireGenerateName() - , setupStandardSymbols = requireStandardSymbols() - , setupSymbolRegistry = requireSymbolRegistry(); - - var create = Object.create - , defineProperties = Object.defineProperties - , defineProperty = Object.defineProperty; - - var SymbolPolyfill, HiddenSymbol, isNativeSafe; - - if (typeof NativeSymbol === "function") { - try { - String(NativeSymbol()); - isNativeSafe = true; - } catch (ignore) {} - } else { - NativeSymbol = null; - } - - // Internal constructor (not one exposed) for creating Symbol instances. - // This one is used to ensure that `someSymbol instanceof Symbol` always return false - HiddenSymbol = function Symbol(description) { - if (this instanceof HiddenSymbol) throw new TypeError("Symbol is not a constructor"); - return SymbolPolyfill(description); - }; - - // Exposed `Symbol` constructor - // (returns instances of HiddenSymbol) - polyfill = SymbolPolyfill = function Symbol(description) { - var symbol; - if (this instanceof Symbol) throw new TypeError("Symbol is not a constructor"); - if (isNativeSafe) return NativeSymbol(description); - symbol = create(HiddenSymbol.prototype); - description = description === undefined ? "" : String(description); - return defineProperties(symbol, { - __description__: d("", description), - __name__: d("", generateName(description)) - }); - }; - - setupStandardSymbols(SymbolPolyfill); - setupSymbolRegistry(SymbolPolyfill); - - // Internal tweaks for real symbol producer - defineProperties(HiddenSymbol.prototype, { - constructor: d(SymbolPolyfill), - toString: d("", function () { return this.__name__; }) - }); - - // Proper implementation of methods exposed on Symbol.prototype - // They won't be accessible on produced symbol instances as they derive from HiddenSymbol.prototype - defineProperties(SymbolPolyfill.prototype, { - toString: d(function () { return "Symbol (" + validateSymbol(this).__description__ + ")"; }), - valueOf: d(function () { return validateSymbol(this); }) - }); - defineProperty( - SymbolPolyfill.prototype, - SymbolPolyfill.toPrimitive, - d("", function () { - var symbol = validateSymbol(this); - if (typeof symbol === "symbol") return symbol; - return symbol.toString(); - }) - ); - defineProperty(SymbolPolyfill.prototype, SymbolPolyfill.toStringTag, d("c", "Symbol")); - - // Proper implementaton of toPrimitive and toStringTag for returned symbol instances - defineProperty( - HiddenSymbol.prototype, SymbolPolyfill.toStringTag, - d("c", SymbolPolyfill.prototype[SymbolPolyfill.toStringTag]) - ); - - // Note: It's important to define `toPrimitive` as last one, as some implementations - // implement `toPrimitive` natively without implementing `toStringTag` (or other specified symbols) - // And that may invoke error in definition flow: - // See: https://github.com/medikoo/es6-symbol/issues/13#issuecomment-164146149 - defineProperty( - HiddenSymbol.prototype, SymbolPolyfill.toPrimitive, - d("c", SymbolPolyfill.prototype[SymbolPolyfill.toPrimitive]) - ); - return polyfill; - } - - var es6Symbol; - var hasRequiredEs6Symbol; - - function requireEs6Symbol () { - if (hasRequiredEs6Symbol) return es6Symbol; - hasRequiredEs6Symbol = 1; - - es6Symbol = requireIsImplemented()() - ? requireGlobalThis().Symbol - : requirePolyfill(); - return es6Symbol; - } - - var isArguments; - var hasRequiredIsArguments; - - function requireIsArguments () { - if (hasRequiredIsArguments) return isArguments; - hasRequiredIsArguments = 1; - - var objToString = Object.prototype.toString - , id = objToString.call((function () { return arguments; })()); - - isArguments = function (value) { return objToString.call(value) === id; }; - return isArguments; - } - - var isFunction; - var hasRequiredIsFunction; - - function requireIsFunction () { - if (hasRequiredIsFunction) return isFunction; - hasRequiredIsFunction = 1; - - var objToString = Object.prototype.toString - , isFunctionStringTag = RegExp.prototype.test.bind(/^[object [A-Za-z0-9]*Function]$/); - - isFunction = function (value) { - return typeof value === "function" && isFunctionStringTag(objToString.call(value)); - }; - return isFunction; - } - - var isImplemented$1 = function () { - var sign = Math.sign; - if (typeof sign !== "function") return false; - return sign(10) === 1 && sign(-20) === -1; - }; - - var shim$2; - var hasRequiredShim$2; - - function requireShim$2 () { - if (hasRequiredShim$2) return shim$2; - hasRequiredShim$2 = 1; - - shim$2 = function (value) { - value = Number(value); - if (isNaN(value) || value === 0) return value; - return value > 0 ? 1 : -1; - }; - return shim$2; - } - - var sign$1 = isImplemented$1() ? Math.sign : requireShim$2(); + var sign$1 = Math.sign; var sign = sign$1 , abs$1 = Math.abs @@ -33068,177 +32666,9 @@ var toPosInteger = function (value) { return max(0, toInteger(value)); }; - var isString; - var hasRequiredIsString; - - function requireIsString () { - if (hasRequiredIsString) return isString; - hasRequiredIsString = 1; - - var objToString = Object.prototype.toString, id = objToString.call(""); - - isString = function (value) { - return ( - typeof value === "string" || - (value && - typeof value === "object" && - (value instanceof String || objToString.call(value) === id)) || - false - ); - }; - return isString; - } - - var shim$1; - var hasRequiredShim$1; - - function requireShim$1 () { - if (hasRequiredShim$1) return shim$1; - hasRequiredShim$1 = 1; - - var iteratorSymbol = requireEs6Symbol().iterator - , isArguments = requireIsArguments() - , isFunction = requireIsFunction() - , toPosInt = toPosInteger - , callable = validCallable - , validValue$1 = validValue - , isValue = isValue$4 - , isString = requireIsString() - , isArray = Array.isArray - , call = Function.prototype.call - , desc = { configurable: true, enumerable: true, writable: true, value: null } - , defineProperty = Object.defineProperty; - - // eslint-disable-next-line complexity, max-lines-per-function - shim$1 = function (arrayLike /*, mapFn, thisArg*/) { - var mapFn = arguments[1] - , thisArg = arguments[2] - , Context - , i - , j - , arr - , length - , code - , iterator - , result - , getIterator - , value; - - arrayLike = Object(validValue$1(arrayLike)); - - if (isValue(mapFn)) callable(mapFn); - if (!this || this === Array || !isFunction(this)) { - // Result: Plain array - if (!mapFn) { - if (isArguments(arrayLike)) { - // Source: Arguments - length = arrayLike.length; - if (length !== 1) return Array.apply(null, arrayLike); - arr = new Array(1); - arr[0] = arrayLike[0]; - return arr; - } - if (isArray(arrayLike)) { - // Source: Array - arr = new Array((length = arrayLike.length)); - for (i = 0; i < length; ++i) arr[i] = arrayLike[i]; - return arr; - } - } - arr = []; - } else { - // Result: Non plain array - Context = this; - } - - if (!isArray(arrayLike)) { - if ((getIterator = arrayLike[iteratorSymbol]) !== undefined) { - // Source: Iterator - iterator = callable(getIterator).call(arrayLike); - if (Context) arr = new Context(); - result = iterator.next(); - i = 0; - while (!result.done) { - value = mapFn ? call.call(mapFn, thisArg, result.value, i) : result.value; - if (Context) { - desc.value = value; - defineProperty(arr, i, desc); - } else { - arr[i] = value; - } - result = iterator.next(); - ++i; - } - length = i; - } else if (isString(arrayLike)) { - // Source: String - length = arrayLike.length; - if (Context) arr = new Context(); - for (i = 0, j = 0; i < length; ++i) { - value = arrayLike[i]; - if (i + 1 < length) { - code = value.charCodeAt(0); - // eslint-disable-next-line max-depth - if (code >= 0xd800 && code <= 0xdbff) value += arrayLike[++i]; - } - value = mapFn ? call.call(mapFn, thisArg, value, j) : value; - if (Context) { - desc.value = value; - defineProperty(arr, j, desc); - } else { - arr[j] = value; - } - ++j; - } - length = j; - } - } - if (length === undefined) { - // Source: array or array-like - length = toPosInt(arrayLike.length); - if (Context) arr = new Context(length); - for (i = 0; i < length; ++i) { - value = mapFn ? call.call(mapFn, thisArg, arrayLike[i], i) : arrayLike[i]; - if (Context) { - desc.value = value; - defineProperty(arr, i, desc); - } else { - arr[i] = value; - } - } - } - if (Context) { - desc.value = null; - arr.length = length; - } - return arr; - }; - return shim$1; - } - - var from = isImplemented$4() ? Array.from : requireShim$1(); - - var isImplemented = function () { - var numberIsNaN = Number.isNaN; - if (typeof numberIsNaN !== "function") return false; - return !numberIsNaN({}) && numberIsNaN(NaN) && !numberIsNaN(34); - }; - - var shim; - var hasRequiredShim; - - function requireShim () { - if (hasRequiredShim) return shim; - hasRequiredShim = 1; - - shim = function (value) { - // eslint-disable-next-line no-self-compare - return value !== value; - }; - return shim; - } + var from = Array.from; - var isNan = isImplemented() ? Number.isNaN : requireShim(); + var isNan = Number.isNaN; var numberIsNaN = isNan , toPosInt = toPosInteger From e8da0f85a51b1a20d5e5d3a191428e2b3d2ce2d1 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 22:32:48 +0200 Subject: [PATCH 13/35] Collapse leftover polyfill aliases (sign, from, isNan, toInteger$1, isObject$1, remove$1). --- docs/lib/paged.browser.js | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index d3709091..2c7c5db4 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -32648,29 +32648,21 @@ UndisplayedFilter ]; - var sign$1 = Math.sign; - - var sign = sign$1 - , abs$1 = Math.abs + var abs$1 = Math.abs , floor$1 = Math.floor; - var toInteger$1 = function (value) { + var toInteger = function (value) { if (isNaN(value)) return 0; value = Number(value); if (value === 0 || !isFinite(value)) return value; - return sign(value) * floor$1(abs$1(value)); + return Math.sign(value) * floor$1(abs$1(value)); }; - var toInteger = toInteger$1 - , max = Math.max; + var max = Math.max; var toPosInteger = function (value) { return max(0, toInteger(value)); }; - var from = Array.from; - - var isNan = Number.isNaN; - - var numberIsNaN = isNan + var numberIsNaN = Number.isNaN , toPosInt = toPosInteger , value$1 = validValue , indexOf$1 = Array.prototype.indexOf @@ -32702,7 +32694,7 @@ , splice = Array.prototype.splice; // eslint-disable-next-line no-unused-vars - var remove$1 = function (itemToRemove /*, …item*/) { + var remove = function (itemToRemove /*, …item*/) { forEach.call( arguments, function (item) { @@ -32717,17 +32709,14 @@ var map = { function: true, object: true }; - var isObject$1 = function (value) { return (isValue(value) && map[typeof value]) || false; }; - - var isObject = isObject$1; + var isObject = function (value) { return (isValue(value) && map[typeof value]) || false; }; var validObject = function (value) { if (!isObject(value)) throw new TypeError(value + " is not an Object"); return value; }; - var aFrom = from - , remove = remove$1 + var aFrom = Array.from , value = validObject , d = dExports , emit = eventEmitterExports.methods.emit From 9474784c7847f01d94fe4b0f48ad3667ce71f655 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 22:41:08 +0200 Subject: [PATCH 14/35] Replace UUID() with a base36 counter (~1s render saving, 1.5MB less string memory). --- docs/lib/paged.browser.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 2c7c5db4..1dafb04c 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -45,17 +45,17 @@ } /** - * Generates a UUID - * based on: http://stackoverflow.com/questions/105034/how-to-create-a-guid-uuid-in-javascript - * @returns {string} uuid + * Returns a unique-within-render id as a base36 string. + * Replaced the prior RFC 4122 v4 UUID generator -- our pipeline only + * needs uniqueness within a single render (data-ref attributes, + * generated CSS variable / selector names, internal object identity), + * not globally. Counter + base36 keeps IDs short (max ~5 chars for + * the ~50k DOM nodes in the book) and shaves the per-call cost from + * ~3us (Date.now + per-char replace closure) to ~50ns. */ + var __pagedjsCounter = 0; function UUID() { - var d = new Date().getTime() + performance.now(); - return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (c) { - var r = (d + Math.random() * 16) % 16 | 0; - d = Math.floor(d / 16); - return (c === "x" ? r : (r & 0x3 | 0x8)).toString(16); - }); + return (++__pagedjsCounter).toString(36); } function attr(element, attributes) { From e29c72434e8cf02a8226d1ddd488067fab02fd36 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 22:47:15 +0200 Subject: [PATCH 15/35] Inline getBoundingClientRect/getClientRects wrappers and delete them. Structural cleanup. Removes one function frame from every overflow-walker iteration. --- docs/lib/paged.browser.js | 53 ++++++++++----------------------------- 1 file changed, 13 insertions(+), 40 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 1dafb04c..3afccbaf 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -8,42 +8,6 @@ (global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.PagedPolyfill = factory()); })(this, (function () { 'use strict'; - // Dispatch helpers: Element has getBoundingClientRect / getClientRects - // natively; Text and other non-Element nodes don't, so they get wrapped - // in a Range. Both call sites (Layout.findOverflow's per-node, per-word, - // per-letter walkers; the isText branch in findRoute) feed in nodes the - // walker may yield as either type, so this dispatch is load-bearing, - // not backcompat. - function getBoundingClientRect(element) { - if (!element) { - return; - } - let rect; - if (typeof element.getBoundingClientRect !== "undefined") { - rect = element.getBoundingClientRect(); - } else { - let range = document.createRange(); - range.selectNode(element); - rect = range.getBoundingClientRect(); - } - return rect; - } - - function getClientRects(element) { - if (!element) { - return; - } - let rect; - if (typeof element.getClientRects !== "undefined") { - rect = element.getClientRects(); - } else { - let range = document.createRange(); - range.selectNode(element); - rect = range.getClientRects(); - } - return rect; - } - /** * Returns a unique-within-render id as a base36 string. * Replaced the prior RFC 4122 v4 UUID generator -- our pipeline only @@ -1919,7 +1883,14 @@ br = undefined; if (node) { - let pos = getBoundingClientRect(node); + let pos; + if (node.nodeType === 1) { + pos = node.getBoundingClientRect(); + } else { + let range = document.createRange(); + range.selectNode(node); + pos = range.getBoundingClientRect(); + } let left = Math.round(pos.left); let right = Math.floor(pos.right); let top = Math.round(pos.top); @@ -2016,7 +1987,9 @@ node.textContent.trim().length && !breakInsideAvoidParentNode(node.parentNode)) { - let rects = getClientRects(node); + let textRange = document.createRange(); + textRange.selectNode(node); + let rects = textRange.getClientRects(); let rect; left = 0; top = 0; @@ -2123,7 +2096,7 @@ break; } - pos = getBoundingClientRect(word); + pos = word.getBoundingClientRect(); left = Math.floor(pos.left); right = Math.floor(pos.right); @@ -2148,7 +2121,7 @@ break; } - pos = getBoundingClientRect(letter); + pos = letter.getBoundingClientRect(); left = Math.floor(pos.left); top = Math.floor(pos.top); From 5ac609d474a110444bbcbc0cabca0a57d24c5365 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 22:59:34 +0200 Subject: [PATCH 16/35] Cheaper BreakToken.toJSON: skip indexOf scan + JSON.stringify (~64ms saved). --- docs/lib/paged.browser.js | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 3afccbaf..33f12e61 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -868,28 +868,21 @@ return true; } + // Cheap loop-detection key for the chunker's per-page Set. + // Common case (Element with its own data-ref): "ref|offset". Falls + // back to "parentRef|siblingIndex|offset" for Text/Comment nodes + // where the ref lives on the parent. The string is opaque to all + // callers other than chunker.flow's loop guard, so the format only + // needs to be unique-per-break-point, not human-readable. toJSON(hash) { - let node; - let index = 0; - if (!this.node) { - return {}; - } + if (!this.node) return ""; if (isElement(this.node) && this.node.dataset.ref) { - node = this.node.dataset.ref; - } else if (hash) { - node = this.node.parentElement.dataset.ref; - } - - if (this.node.parentElement) { - const children = Array.from(this.node.parentElement.childNodes); - index = children.indexOf(this.node); + return this.node.dataset.ref + "|" + (this.offset || 0); } - - return JSON.stringify({ - "node": node, - "index" : index, - "offset": this.offset - }); + const parent = this.node.parentElement; + const parentRef = parent ? parent.dataset.ref : ""; + const index = parent ? Array.prototype.indexOf.call(parent.childNodes, this.node) : 0; + return parentRef + "|" + index + "|" + (this.offset || 0); } } From 7898bc82f5b17a9bc24540a8e0ad726c49486a7a Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 23:32:05 +0200 Subject: [PATCH 17/35] Fix Footnotes.renderNode always-truthy NodeList condition. --- docs/lib/paged.browser.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 33f12e61..7e84f353 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -31219,7 +31219,12 @@ if (node.dataset.note === "footnote") { notes = [node]; - } else if (node.dataset.hasNotes || node.querySelectorAll("[data-note='footnote']")) { + } else if (node.dataset.hasNotes) { + // Upstream wrote `|| node.querySelectorAll(...)` here, but a + // NodeList is always truthy (even empty), so the right arm + // of the || always ran and the next line ran querySelectorAll + // again -- two subtree scans per element-node clone for any + // document that doesn't use data-note='footnote'. notes = node.querySelectorAll("[data-note='footnote']"); } From a5880f172785f04e38643a40454ff2316b1cc6d0 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 23:32:50 +0200 Subject: [PATCH 18/35] Hook.triggerSync empty-handlers fast-path (~490ms saved). --- docs/lib/paged.browser.js | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 7e84f353..10e49dd2 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -1293,11 +1293,25 @@ } /** - * Triggers a hook to run all functions synchronously - * @example this.content.trigger(args).then(function(){...}); - * @return {Array} results + * Triggers a hook to run all functions synchronously. + * @return {Array|undefined} results array, or undefined when no + * handlers are registered (callers can skip their reducer + * forEach with a simple truthy check). + * + * [PATCH: hook-fast-path-sync] Mirrors the async `trigger()` patch: + * skip the results-array alloc and the empty-forEach indirection + * when this.hooks is empty. In the per-page hot path, onOverflow + * and onBreakToken have zero registered handlers in this build, + * so every call to those two hooks via triggerSync was pure + * dispatch overhead -- ~3300 calls per render on the 1650-page + * book. Callers in those reducer sites now read: + * + * let r = hook.triggerSync(...); + * if (r) r.forEach(...); */ triggerSync(){ + if (this.hooks.length === 0) return undefined; + var args = arguments; var context = this.context; var results = []; @@ -1549,7 +1563,7 @@ offset ); let breakHooks = this.hooks.onBreakToken.triggerSync(newBreakToken, undefined, node, this); - breakHooks.forEach((newToken) => { + if (breakHooks) breakHooks.forEach((newToken) => { if (typeof newToken != "undefined") { newBreakToken = newToken; } @@ -1638,7 +1652,7 @@ } let nodeHooks = this.hooks.renderNode.triggerSync(clone, node, this); - nodeHooks.forEach((newNode) => { + if (nodeHooks) nodeHooks.forEach((newNode) => { if (typeof newNode != "undefined") { clone = newNode; } @@ -1803,7 +1817,7 @@ let breakToken, breakLetter; let overflowHooks = this.hooks.onOverflow.triggerSync(overflow, rendered, bounds, this); - overflowHooks.forEach((newOverflow) => { + if (overflowHooks) overflowHooks.forEach((newOverflow) => { if (typeof newOverflow != "undefined") { overflow = newOverflow; } @@ -1813,7 +1827,7 @@ breakToken = this.createBreakToken(overflow, rendered, source); // breakToken is nullable let breakHooks = this.hooks.onBreakToken.triggerSync(breakToken, overflow, rendered, this); - breakHooks.forEach((newToken) => { + if (breakHooks) breakHooks.forEach((newToken) => { if (typeof newToken != "undefined") { breakToken = newToken; } From 80c3b4f4ed8200aaee3f543b0c0daebb212f7996 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Thu, 21 May 2026 23:54:19 +0200 Subject: [PATCH 19/35] Footnotes unregisters its hooks when no footnotes in source (~370ms saved). --- docs/lib/paged.browser.js | 42 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 10e49dd2..57f89ace 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -27534,13 +27534,41 @@ this.polisher = polisher; this.caller = caller; + // [PATCH: handler-self-disable] Track each (hook, bound) pair we + // register so handlers that find nothing to do for a given render + // can splice themselves back out. Footnotes uses this to disappear + // when the document and CSS produced no footnote-marked nodes; + // combined with Hook.trigger/triggerSync's empty-handlers fast + // path, the per-page and per-node dispatches then short-circuit. + this._registered = {}; + for (let name in hooks) { if (name in this) { let hook = hooks[name]; - hook.register(this[name].bind(this)); + let bound = this[name].bind(this); + this._registered[name] = { hook, bound }; + hook.register(bound); } } } + + /** + * Remove this handler's registered callbacks from every hook it + * subscribed to. Pass the name of the hook the caller is currently + * inside (e.g. `"afterParsed"`) to skip its own entry -- splicing + * the array we're iterating would cause the surrounding `trigger()` + * loop to skip a sibling handler. The skipped entry is harmless on + * one-shot hooks; on recurring hooks the caller can re-call later. + */ + _unregisterAll(except) { + for (const name in this._registered) { + if (name === except) continue; + const { hook, bound } = this._registered[name]; + const idx = hook.hooks.indexOf(bound); + if (idx >= 0) hook.hooks.splice(idx, 1); + delete this._registered[name]; + } + } } EventEmitter(Handler.prototype); @@ -31178,6 +31206,18 @@ afterParsed(parsed) { this.processFootnotes(parsed, this.footnotes); + + // [PATCH: footnotes-self-disable] If neither source HTML nor CSS + // `float: footnote` rules produced any footnote-marked nodes, the + // remaining hooks (renderNode per element-node, afterPageLayout + + // beforePageLayout + afterOverflowRemoved per page) have nothing + // to do for the rest of this render. Unregister them so the + // empty-handlers fast-path in Hook.triggerSync short-circuits. + // afterParsed itself is skipped via `except` -- it's a one-shot + // and the surrounding trigger() loop is still iterating it. + if (!parsed.querySelector("[data-note='footnote']")) { + this._unregisterAll("afterParsed"); + } } processFootnotes(parsed, notes) { From 08289a12f2c38ec588aab9b1c7385f87dfab59fb Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Fri, 22 May 2026 00:05:04 +0200 Subject: [PATCH 20/35] perf/README: document the append() investigation and three landings. --- perf/README.md | 315 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 315 insertions(+) diff --git a/perf/README.md b/perf/README.md index 398c744b..75133ccf 100644 --- a/perf/README.md +++ b/perf/README.md @@ -3084,3 +3084,318 @@ The hot per-page path is now `function`, `function*`, plain return values, and a `while` loop. Future work that touches this code can reason about it as straight-line synchronous flow. + +## Doing less work in `Layout.append()` + +Picking the next hotspot after the async cleanup, BreakToken +JSON, gBCR wrapper inline, and UUID-counter changes had all +landed. Fresh profile from a clean baseline at 100us sampling +(V8 effectively clamped this to ~543us/sample on this Node/ +Chromium build), `--no-timing --detach-pages`, render-only: + +``` + self_ms self_% function @ source + ------- ------ -------------------------------------------------- + 4825.28 38.22% getBoundingClientRect (native) + 2021.89 16.02% (program) (native) + 1954.01 15.48% removeChild (native) + 635.95 5.04% removeOverflow paged.browser.js + 288.38 2.28% wrapContent paged.browser.js + 255.25 2.02% insertBefore (native) + 227.01 1.80% appendChild (native) + 164.01 1.30% findOverflow paged.browser.js + 140.66 1.11% (garbage collector) (native) + 138.49 1.10% afterPageLayout paged.browser.js (Splits) + 129.25 1.02% cloneNode (native) + 125.99 1.00% addRefs paged.browser.js + 90.15 0.71% renderTo paged.browser.js + 81.46 0.65% filterTree paged.browser.js + 80.92 0.64% importNode (native) + 80.38 0.64% setAttribute (native) + 72.77 0.58% append paged.browser.js + ... +``` + +The four heavy hitters are unchanged from earlier reports. +`Layout.append` itself shows only 73 ms of self-time, but +inclusively it owns a large fraction of the per-source-node +work: `cloneNode`, `appendChild`/`insertBefore`, the +`findElement` chain (`querySelector` + `getAttribute`), the +`renderNode` hook dispatch, and `rebuildAncestors` at page +boundaries all flow through it. With ~100k+ source-node +clones per render, anything per-call adds up. + +Reading the body of `append()`, three things stood out as +potentially-reducible: + +1. The `renderNode` hook dispatch fires for every cloned + node. Even if no handler is registered, `triggerSync` + still allocates a results array, runs `this.hooks.forEach` + over zero entries, and returns the empty array; the + caller then runs its own `.forEach` over that empty array. +2. The `findElement(node.parentNode, dest)` lookup goes + through `getAttribute("data-ref")` on the parent. The + ref is also set on every source element at decoration + time, so the value could be stashed on a plain JS expando. +3. `clone.dataset.ref` is read a second time at the end of + `append()` to register the clone in `dest.indexOfRefs`. + Same expando trick applies. + +Following the (1) thread first uncovered two separable wins: +a bug inside the only registered `renderNode` handler, and +the broader empty-handlers dispatch overhead. + +### `Footnotes.renderNode`: always-truthy NodeList condition + +The grep for `renderNode` method definitions in the bundle +returns exactly one match: `Footnotes.renderNode` (in the +package's footnotes-handling class). Every `append()` call +goes through it. Its body: + +```js +renderNode(node) { + if (node.nodeType == 1) { + let notes; + if (!node.dataset) return; + + if (node.dataset.note === "footnote") { + notes = [node]; + } else if (node.dataset.hasNotes || + node.querySelectorAll("[data-note='footnote']")) { + notes = node.querySelectorAll("[data-note='footnote']"); + } + + if (notes && notes.length) { + this.findVisibleFootnotes(notes, node); + } + } +} +``` + +The `else if` condition has an upstream bug: a `NodeList` is +always truthy (even an empty one -- it's an object), so when +`dataset.hasNotes` is undefined the right arm of the `||` +runs `querySelectorAll`, the condition evaluates true, and +the next line then runs `querySelectorAll` **a second time**. +Two subtree scans per element-node clone, for any document +that doesn't author `data-note='footnote'` directly. + +`grep -c 'data-note' docs/_site-pdf/book.html` returns 0 -- +every one of those scans on every clone of every page of +the book was dead work. + +The fix narrows the `else if` to the original intent: + +```js +} else if (node.dataset.hasNotes) { + notes = node.querySelectorAll("[data-note='footnote']"); +} +``` + +Profile delta (post-tojson baseline vs surgical fix): + +| metric | baseline | post-fix | Δ | +| ------ | -------- | -------- | --- | +| render wall | 12.63 s | 12.63 s | flat (within noise) | +| `querySelectorAll` self | 67.9 ms | 52.8 ms | -15 ms | +| samples | 23,313 | 23,250 | -63 | + +A small saving in absolute terms: most of the eliminated +`querySelectorAll` calls were against tiny leaf subtrees +that terminate in microseconds when no matches are present. +The bug fix is upstream-clean and correct; the perf-relevant +takeaway was that *most* of the work `append()` pays for the +`renderNode` hook is in the dispatch wrapping the handler, +not in the handler's body. That motivated (2). + +### `Hook.triggerSync` empty-handlers fast-path + +Mirrors the README's earlier "Phase 1: hook fast-path" for +the async `trigger()` path. `Hook.triggerSync` previously: + +```js +triggerSync() { + var args = arguments; + var context = this.context; + var results = []; + this.hooks.forEach(function (task) { + var executing = task.apply(context, args); + results.push(executing); + }); + return results; +} +``` + +…and the four reducer call sites in `Layout` always did: + +```js +let r = this.hooks.X.triggerSync(...); +r.forEach((newVal) => { if (newVal !== undefined) target = newVal; }); +``` + +Walking the bundle to see which of those four hook arrays +are actually populated in our build: + +| call site | hook | handlers registered | +| --------- | ---- | ------------------- | +| `breakAt` (line 1551) | `onBreakToken` | 0 | +| `append` (line 1640) | `renderNode` | 1 (`Footnotes`) | +| `findBreakToken` (line 1805) | `onOverflow` | 0 | +| `findBreakToken` (line 1815) | `onBreakToken` | 0 | +| `Chunker.flow` (line 2910) | `filter` | 4 | + +Three of the four hot sites are dispatching against an empty +handler array every call. `onOverflow` and the two +`onBreakToken` sites all fire from the per-page break- +detection path, which can run more than once per page when +overflow-and-retry happens. + +Patch: `triggerSync` returns `undefined` on the empty path, +callers guard their reducer `forEach` with a truthy check. + +```js +triggerSync() { + if (this.hooks.length === 0) return undefined; + // ...existing body +} +``` + +```js +let r = this.hooks.X.triggerSync(...); +if (r) r.forEach((newVal) => { ... }); +``` + +Profile delta (post-surgical vs post-fast-path): + +| metric | post-surgical | post-fast-path | Δ | +| ------ | ------------- | -------------- | --- | +| render wall | 12.63 s | **12.14 s** | **-0.49 s** | +| samples | 23,250 | 22,433 | -817 | +| `getBoundingClientRect` self | 4,819 ms | 4,714 ms | -105 ms | +| `removeChild` self | 1,962 ms | 1,902 ms | -60 ms | +| `removeOverflow` self | 634 ms | 552 ms | -82 ms | +| `querySelectorAll` self | 52.8 ms | 43.4 ms | -10 ms | + +The wall-clock drop (~490 ms) and sample drop (817 × 542 us +≈ 443 ms) line up cleanly, so the saving is real, not run- +to-run noise. The reductions spread across rows because the +per-call cost of an empty `triggerSync` -- an array alloc, a +forEach over zero entries, a return, and the caller's own +forEach over the returned `[]` -- creates pressure on the +allocator and the V8 inliner that compounds on the per-page +hot path even though no single line attributes the cost. + +The `renderNode` site at line 1640 does **not** hit the fast +path in this build -- `Footnotes` still occupies it with one +handler, so `hooks.length === 1` and the body runs as +before. The savings come entirely from the three zero- +handler sites. + +### `Footnotes` self-disables when no footnotes are in source + +That left the per-element `Footnotes.renderNode` dispatch +still firing on every cloned node, plus four other hook +methods `Footnotes` registers via the `Handler` base auto- +wiring. Inventory of what `Footnotes` is doing on a render +with zero footnote-marked nodes: + +| method | fires | what it does on a footnote-free doc | +| ------ | ----- | ----------------------------------- | +| `onDeclaration` | per CSS declaration | quick property-name checks. Cheap. | +| `renderNode` | per element-node clone | short-circuits after surgical fix. | +| `beforePageLayout` | once per page | checks `this.needsLayout.length` (always 0). Cheap. | +| `afterPageLayout` | once per page | **3 `querySelector`s + `getBoundingClientRect` + `new Layout(...)` (which does 2 more `getBoundingClientRect`s + `getComputedStyle` in its constructor) + `findOverflow()` on the footnote-inner-content area.** Real work. | +| `afterOverflowRemoved` | per overflow detection | `querySelectorAll` returning empty. Cheap-ish. | + +The big hidden cost was `afterPageLayout` -- ~1,650 calls per +render, each measuring an empty footnote area through several +DOM ops and constructing a transient `Layout` instance whose +constructor itself does multiple gBCRs. + +The detect-and-disable plan: + +1. Footnotes is the *only* registrant for each of its hook + methods (`onDeclaration` aside -- it's a polisher-time + hook with other registrants, but it's also cheap). +2. By the time `afterParsed` fires, both the CSS-driven + selectors (populated by `onDeclaration` calls into + `this.footnotes`) and any source-HTML `data-note` markers + are accounted for. `Footnotes.afterParsed` already runs + `processFootnotes(parsed, this.footnotes)` which writes + `data-note='footnote'` on any element matching a CSS + selector. So a single `parsed.querySelector( + "[data-note='footnote']")` at the end of that pass is + conclusive. +3. If null, splice `Footnotes`'s bound functions back out + of each hook array. With the empty-handlers fast-path + from (2) already landed, the per-page and per-node + dispatches then return `undefined` immediately and + callers skip their reducer `forEach`. + +To enable (3), the `Handler` base class gets a small +addition: each `(hook, bound)` pair from auto-registration +is stashed under its hook name on `this._registered`, and a +new `_unregisterAll(except)` method splices each entry back +out. The `except` argument lets the caller skip the hook +it's currently inside (`afterParsed` in this case) -- +splicing the array we're iterating would cause the +surrounding `trigger()` loop to skip a sibling handler. +The skipped entry stays in `this._registered` forever, but +it's a one-shot anyway: harmless. + +`Footnotes.afterParsed` then becomes: + +```js +afterParsed(parsed) { + this.processFootnotes(parsed, this.footnotes); + if (!parsed.querySelector("[data-note='footnote']")) { + this._unregisterAll("afterParsed"); + } +} +``` + +Profile delta (post-fast-path vs post-self-disable): + +| metric | post-fast-path | post-self-disable | Δ | +| ------ | -------------- | ----------------- | --- | +| render wall | 12.14 s | **11.77 s** | **-0.37 s** | +| samples | 22,433 | 21,809 | -624 | +| **`getBoundingClientRect` self** | **4,714 ms** | **4,198 ms** | **-516 ms** | +| `removeChild` self | 1,902 ms | 1,898 ms | flat | +| `(program)` self | 2,022 ms | 2,198 ms | +176 ms | +| `append` self | 76 ms | 69 ms | -7 ms | + +The 516 ms `getBoundingClientRect` drop is exactly the +`Footnotes.afterPageLayout` cost that the inventory +predicted -- one gBCR on `noteContent` plus two more in +the `new Layout(noteArea, ...)` constructor plus internal +gBCRs from `findOverflow()`, multiplied by ~1,650 pages. +The `(program)` row growing by 176 ms is V8 reattributing +work between native and self-time as the dispatch pattern +changes; not new work, just a different breakdown. + +PDF output remained byte-identical to the previous build +on this content (16.1 MB, same checksum on the raw +Chromium output). + +### Cumulative effect + +Across all three landings: + +| metric | pre-investigation | post-self-disable | Δ | +| ------ | ----------------- | ----------------- | --- | +| render wall | 12.63 s | 11.77 s | **-0.86 s (-6.8 %)** | +| samples | 23,313 | 21,809 | -1,504 | +| `getBoundingClientRect` self | 4,825 ms | 4,198 ms | -627 ms | +| `removeChild` self | 1,954 ms | 1,898 ms | -56 ms | +| `removeOverflow` self | 636 ms | 568 ms | -68 ms | + +The `Handler._registered` + `_unregisterAll(except)` plumbing +is reusable: any future handler that determines at +parse/decoration time that it has nothing to do for a given +render can self-disable the same way, and the +empty-handlers fast-path will swallow the per-call dispatch +cost for free. That's the pattern this work leaves behind -- +combine "detect once at a known-quiet point" with "remove +yourself from the dispatch chain" and you pay zero +ongoing cost for inactive handlers. From 3ac5ec04cb37780dca3bbeb77d89076e04f94684 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Fri, 22 May 2026 00:16:19 +0200 Subject: [PATCH 21/35] Layout.append: cache last-seen (srcParent, dest) -> destParent across sibling calls. --- docs/lib/paged.browser.js | 42 ++++++++++++++- perf/README.md | 104 +++++++++++++++++++++++++++++++++++--- 2 files changed, 138 insertions(+), 8 deletions(-) diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 57f89ace..54898620 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -1376,6 +1376,18 @@ this.settings = options || {}; this.maxChars = this.settings.maxChars || MAX_CHARS_PER_BREAK; + + // [PATCH: parent-lookup-cache] One-entry memo of the last + // (sourceParent, dest) -> destParent resolution from append(). + // Consecutive siblings in the source tree all resolve to the + // same destParent, so caching the previous result lets the + // per-call findElement / indexOfRefs lookup short-circuit. + // Invalidated at the start of every renderTo; safe within a + // single renderTo loop because append() never detaches DOM + // from dest (removeOverflow only fires after the loop exits). + this._lastSrcParent = null; + this._lastDest = null; + this._lastDestParent = null; this.forceRenderBreak = false; } @@ -1384,6 +1396,15 @@ // removes the per-page Promise allocation that was returned from // page.layout / chunker.layout up the chain. renderTo(wrapper, source, breakToken, bounds = this.bounds) { + // [PATCH: parent-lookup-cache] Invalidate the per-Layout + // parent memo. The previous renderTo on this Layout instance + // (same Page, multiple renderTo calls) may have run + // findBreakToken -> removeOverflow at exit, leaving the + // cached destParent detached. + this._lastSrcParent = null; + this._lastDest = null; + this._lastDestParent = null; + let start = this.getStart(source, breakToken); let walker = walk$2(start, source); @@ -1607,7 +1628,16 @@ let clone = cloneNode(node, !shallow); if (node.parentNode && isElement(node.parentNode)) { - let parent = findElement(node.parentNode, dest); + const srcParent = node.parentNode; + // [PATCH: parent-lookup-cache] Consecutive sibling appends + // share the same source parent; reuse the prior result + // instead of walking dest.indexOfRefs again. + let parent; + if (srcParent === this._lastSrcParent && dest === this._lastDest) { + parent = this._lastDestParent; + } else { + parent = findElement(srcParent, dest); + } // Rebuild chain if (parent) { parent.appendChild(clone); @@ -1639,6 +1669,16 @@ dest.appendChild(clone); } + // [PATCH: parent-lookup-cache] Cache the resolved (or + // rebuilt-and-attached) parent so the next sibling can + // skip the lookup. Skip on the no-rebuild fall-through + // where parent stayed null -- a later call with the same + // srcParent should still attempt the lookup. + if (parent) { + this._lastSrcParent = srcParent; + this._lastDest = dest; + this._lastDestParent = parent; + } } else { dest.appendChild(clone); diff --git a/perf/README.md b/perf/README.md index 75133ccf..8f9d2c05 100644 --- a/perf/README.md +++ b/perf/README.md @@ -3378,17 +3378,107 @@ PDF output remained byte-identical to the previous build on this content (16.1 MB, same checksum on the raw Chromium output). +### `Layout.append` parent-lookup cache + +When the source walker emits consecutive children of the +same parent, `findElement(node.parentNode, dest)` in +`append()` gets called repeatedly with the same input. +For a parent with N children that's N - 1 redundant +lookups -- each one cheap (`getAttribute("data-ref")` + +`dest.indexOfRefs[ref]` is an O(1) dict hit on the fast +path), but the call count is north of 100k per render. + +Patch: a three-property memo on `Layout` -- last +`srcParent`, last `dest`, last `destParent`. Hit check at +the top of `append`, writeback at the bottom after the +parent is resolved (whether via direct lookup or via the +rebuild-ancestors branch, since the rebuild attaches the +cloned ancestor into `dest`). + +Invalidation: reset all three at the top of every +`renderTo`. The cache is safe within a single `renderTo` +loop because `append()` never detaches DOM from `dest`, +and `removeOverflow` (the one thing that does) only fires +at loop exit. Across `renderTo` calls on the same `Layout` +instance the previous run's `removeOverflow` may have +detached the cached parent, so the explicit reset is the +correctness guard. + +Profile delta (post-self-disable vs post-parent-cache): + +| metric | post-self-disable | post-parent-cache | Δ | +| ------ | ----------------- | ----------------- | --- | +| render wall | 11.77 s | 11.72 s | flat (within noise) | +| samples | 21,809 | 21,688 | -121 (~65 ms) | +| `(program)` self | 2,198 ms | 2,169 ms | -29 ms | +| `getAttribute` (native) | 43 ms | off-list (<40 ms) | -3 ms+ | +| `querySelector` (native) | 63 ms | 59 ms | -4 ms | +| `Layout.append` self | 69 ms | 70 ms | flat | + +Order ~50-100 ms saved depending on the row chosen, fully +below the run-to-run wall-clock noise band but visible in +the cpuprofile rows. The math checks: ~100k append calls +× ~80 % sibling-cache-hit rate × ~1 us per skipped +findElement ≈ 80 ms. + +PDF output byte-identical. + +### What didn't land: the `_ref` expando + +One sibling candidate to the parent-lookup cache was +tried and reverted. The idea: mirror `data-ref` onto a +plain JS property `_ref` at decoration time (in +`ContentParser.addRefs`), propagate via the `cloneNode` +helper, and read it in `findElement` and `append`'s +postlude instead of `getAttribute("data-ref")` / +`clone.dataset.ref`. Both reads in the hot path become +plain JS property loads instead of going through C++ DOM +attribute fetches or the `DOMStringMap` proxy. + +Measured win on the per-row breakdown: + +- `Layout.append` self 69 -> 47 ms (-22 ms). +- `getAttribute` native 43 ms -> off-list (-3+ ms). + +About 25 ms of real per-call work removed. Reverted: the +saving is genuinely smaller than the diff's surface -- +`cloneNode` helper has to propagate an extra property, +the `data-ref` attribute has to stay for CSS selectors +and the `querySelector` fallback in `findRef`, `findElement` +needs a `||` fallback to keep direct `.cloneNode()` +callers in `rebuildAncestors` working unchanged, and any +future code that wants the ref has two places it could +read from. Not worth maintaining for a saving that +doesn't move single-run wall-clock. + +Lesson worth carrying forward: at this point in the +codebase, per-call findElement / `dataset.ref` work has +been ground down close enough to its floor that any +further shave produces savings in the 20-50 ms band, well +below the run-to-run wall-clock noise on this machine. +Reading the cpuprofile per-row deltas is the only way to +tell whether such a change is genuine; reading wall-clock +isn't. And the bar for landing scales with the size of +the diff -- the parent-cache landed because it's three +property writes and one branch; the expando didn't +because it's a propagation pattern that ripples through +the bundle. + ### Cumulative effect -Across all three landings: +Across all four landings: -| metric | pre-investigation | post-self-disable | Δ | +| metric | pre-investigation | post-parent-cache | Δ | | ------ | ----------------- | ----------------- | --- | -| render wall | 12.63 s | 11.77 s | **-0.86 s (-6.8 %)** | -| samples | 23,313 | 21,809 | -1,504 | -| `getBoundingClientRect` self | 4,825 ms | 4,198 ms | -627 ms | -| `removeChild` self | 1,954 ms | 1,898 ms | -56 ms | -| `removeOverflow` self | 636 ms | 568 ms | -68 ms | +| render wall | 12.63 s | 11.72 s | **-0.91 s (-7.2 %)** | +| samples | 23,313 | 21,688 | -1,625 | +| `getBoundingClientRect` self | 4,825 ms | 4,194 ms | -631 ms | +| `removeChild` self | 1,954 ms | 1,897 ms | -57 ms | +| `removeOverflow` self | 636 ms | 583 ms | -53 ms | +| `getAttribute` (native) | ~125 ms* | off-list (<40 ms) | -85 ms+ | + +\* Inferred from the post-tojson baseline rank; not +explicitly tabulated in the top-25 cut at that time. The `Handler._registered` + `_unregisterAll(except)` plumbing is reusable: any future handler that determines at From d37d191447fe7a00f960693d402459baecbe42e3 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Fri, 22 May 2026 00:52:27 +0200 Subject: [PATCH 22/35] Layout.wrapContent: skip the innerHTML round-trip (~870ms saved). --- docs/lib/paged.browser.js | 43 ++++++++--- perf/README.md | 157 ++++++++++++++++++++++++++++++++++++++ perf/find-callees.mjs | 69 +++++++++++++++++ perf/grep-profile.mjs | 37 +++++++++ 4 files changed, 296 insertions(+), 10 deletions(-) create mode 100644 perf/find-callees.mjs create mode 100644 perf/grep-profile.mjs diff --git a/docs/lib/paged.browser.js b/docs/lib/paged.browser.js index 54898620..d41f94aa 100644 --- a/docs/lib/paged.browser.js +++ b/docs/lib/paged.browser.js @@ -32929,16 +32929,39 @@ let template; template = body.querySelector(":scope > template[data-ref='pagedjs-content']"); - if (!template) { - // Otherwise create one - template = document.createElement("template"); - template.dataset.ref = "pagedjs-content"; - template.innerHTML = body.innerHTML; - body.innerHTML = ""; - body.appendChild(template); - } - - return template.content; + if (template) { + // [PATCH: wrap-content-move] Re-entrant call: the fragment we + // returned previously was stashed on the marker template's + // `_pagedjsContent` expando (template.content stays empty under + // the move strategy below). + return template._pagedjsContent || template.content; + } + + // [PATCH: wrap-content-move] Move children into a plain + // DocumentFragment owned by the live document instead of round- + // tripping through innerHTML (serialise the entire body to a + // string, reparse into a template). The round-trip is O(document + // size) twice over; the move is one O(n) detach/attach pass with + // no string work. + // + // Why a plain DocumentFragment and not template.content: a + // template's content fragment is owned by the inert "template + // contents owner document", and moving live elements into + // it triggers adoptNode which runs the spec's "update the image + // data" algorithm. That resets .complete and leaves the source + // image in a state where later cloning into the live page wrapper + // doesn't synchronously cache-hit -- our sync waitForImages check + // then throws. A plain fragment stays in the live document so + // adoption is a no-op and image state is preserved. + let fragment = document.createDocumentFragment(); + while (body.firstChild) { + fragment.appendChild(body.firstChild); + } + template = document.createElement("template"); + template.dataset.ref = "pagedjs-content"; + template._pagedjsContent = fragment; + body.appendChild(template); + return fragment; } removeStyles(doc=document) { diff --git a/perf/README.md b/perf/README.md index 8f9d2c05..92de0ddf 100644 --- a/perf/README.md +++ b/perf/README.md @@ -68,6 +68,9 @@ DevTools-compatible trace is a few lines. | `compare-outlines.mjs` | Diffs two PDFs' `/Outlines` trees by `(depth, title, target page)`. Used to verify whether Chrome's native outline matches the injected one. | | `probe-outline-exclusions.mjs` | Tests which per-element attributes / styles (aria-hidden, role=presentation, hidden, display:none, CSS bookmark-level, ...) make Chrome drop a heading from its outline. | | `analyze-profile.mjs` | Bottom-up self-time analyzer for `.cpuprofile` files. Same shape as DevTools' Performance bottom-up view, in the terminal. | +| `find-callers.mjs` | "Who paid for this callee's time?" -- walks a `.cpuprofile` and attributes a target function's total time back to each direct caller. Used throughout the post-mortems to detect gBCR migration between callers. | +| `find-callees.mjs` | The other direction of `find-callers.mjs`: splits a function's self+descendant time across its direct callees. Surfaces the cases where V8 has rolled native DOM work back into the calling JS frame (Range deletion in `removeOverflow`, HTML parser in `wrapContent`). | +| `grep-profile.mjs` | Lists every node in a `.cpuprofile` whose `functionName` matches a regex, with self-time and location. Quick check for "is this frame in the profile at all, and what's it called?" | | `run.bat` | Windows wrapper. Installs deps on first run, then invokes `node measure.mjs`. | | `results/` | Output, one timestamped subfolder per run. Git-ignored. | @@ -3489,3 +3492,157 @@ cost for free. That's the pattern this work leaves behind -- combine "detect once at a known-quiet point" with "remove yourself from the dispatch chain" and you pay zero ongoing cost for inactive handlers. + +## Skipping the `wrapContent` innerHTML round-trip + +The post-append-cache profile's 5th-largest JS row was +`wrapContent` at 260 ms. It's called once per render, right +at the top of `Chunker.flow`, so unlike the previous fixes it +has no per-page hot path -- the absolute size is the whole +story. + +`Layout.wrapContent` lifts the entire `` into a +`