diff --git a/docs/lib/fast-array-onebuf.mjs b/docs/lib/fast-array-onebuf.mjs new file mode 100644 index 0000000..5ed833c --- /dev/null +++ b/docs/lib/fast-array-onebuf.mjs @@ -0,0 +1,340 @@ +// One-buffer PDFArray: every committed element lives in a single +// append-only JS Array (arrayMain), kept for the document's lifetime. +// Mirror of fast-dict-onebuf's strategy applied to PDFArray. Backing +// is a plain heterogeneous JS Array -- slots hold the original +// PDFObject references directly. No encoding, no decode on read; the +// hot path is `arrayMain[start + i]`. +// +// Phase 3 of fast-dict-encoded did the same range-view refactor on +// PDFArray but used a Float64Array + encoded slots (mirroring its +// dict shape). The encoded backing cost ~300 ms of decodeValue +// dispatch during save (PDFArray.copyBytesInto iterates ~500 k +// elements). This shim keeps the heap win (~19 MB on the book by +// removing each PDFArray's per-instance `this.array = []`) without +// paying the decode cost: slots are JS references, reads are direct. +// +// 40-bit packed Number layout (well within Number.MAX_SAFE_INTEGER): +// bits 0-23: start (24 bits, max 16 M slots in arrayMain) +// bits 24-39: length (16 bits, max 65 536 elements; max observed +// ~25 k on the book) +// bits 40-52: spare (13 bits) +// +// Recursion. parseArray pushes elements onto a per-parser _arrayTemp; +// inner parseArray invocations append on top, commit their frame to +// arrayMain in one append, and pop temp back. Inner / outer ranges +// in arrayMain do not overlap. _arrayTemp is independent of +// fast-dict-onebuf's _dictTemp so dict <-> array recursion is fine. +// +// Mutations: +// - set(i, v): in-place replace (safe; no length change) +// - push(v) at HWM: in-place extend (no other arrays follow) +// - push(v) not at HWM: COW the range to tail, then push +// - insert / remove: always COW (shifts would corrupt neighbours) +// Same at-HWM-determines-safety logic as fast-dict-onebuf; no owned +// bit needed (see fast-dict-onebuf commit 7e8b1f7). +// +// Singleton PDFContext (one PDFDocument.load per process in our +// pipeline). The singleton is duplicated rather than shared with +// fast-dict-onebuf -- the mechanism is ten lines and keeping each +// shim independently injectable is worth more than dedup'ing it. +// Both shims end up holding references to the same PDFContext. +// +// Composes with --fast-dict-onebuf. Mutually exclusive with +// --fast-dict-encoded (which subsumes both via its own encoded shape). + +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const PDFArray = require('pdf-lib/cjs/core/objects/PDFArray.js').default; +const PDFObjectParser = require('pdf-lib/cjs/core/parser/PDFObjectParser.js').default; +const CharCodes = require('pdf-lib/cjs/core/syntax/CharCodes.js').default; + +// ---- The single buffer --------------------------------------------- + +// Pre-sized to total array slots + slack on the book. Other workloads +// grow it naturally from this starting size. When the measure-pass +// shim runs first, it calls setExpectedArraySlots() before parse, +// which resizes `arrayMain` to exact measured demand via +// `arrayMain.length = N`. +const ARRAY_MAIN_INITIAL_CAP = 800000; +const arrayMain = new Array(ARRAY_MAIN_INITIAL_CAP); +let arrayMainLen = 0; + +export { arrayMain }; +export function getArrayMainLen() { return arrayMainLen; } + +// Resize arrayMain in place. Must be called before any parseArray / +// withContext (i.e. while arrayMainLen is still 0). `slack` is a +// multiplier on `slots`; default 1.0 (exact). Same in-place-resize +// rationale as fast-dict-onebuf's setExpectedDictSlots: reassigning +// the module-level binding invalidates V8's inline-cache slots in +// every closure that reads it, and the deopt + recompile shows up as +// a parse-time allocation spike. +export function setExpectedArraySlots(slots, slack = 1.0) { + if (arrayMainLen > 0) { + throw new Error( + `fast-array-onebuf: setExpectedArraySlots called after parse started (arrayMainLen=${arrayMainLen})`, + ); + } + arrayMain.length = Math.ceil(slots * slack); +} + +// ---- Bit-packing helpers ------------------------------------------- + +const POW_24 = 16777216; // 2^24 +const MASK_24 = 0xFFFFFF; +const MASK_16 = 0xFFFF; + +const MAX_START = POW_24; // exclusive +const MAX_LENGTH = 1 << 16; // 65 536, exclusive + +function pack(start, length) { + if (start >= MAX_START) throw new Error(`fast-array-onebuf: start ${start} exceeds 24-bit budget`); + if (length >= MAX_LENGTH) throw new Error(`fast-array-onebuf: length ${length} exceeds 16-bit budget`); + return start + length * POW_24; +} + +function _start(d) { return d & MASK_24; } +function _length(d) { return Math.floor(d / POW_24) & MASK_16; } + +// ---- Singleton context --------------------------------------------- + +let _singletonContext = null; + +function _registerContext(ctx) { + if (_singletonContext === null) { + _singletonContext = ctx; + } else if (_singletonContext !== ctx) { + throw new Error('fast-array-onebuf: expected a singleton PDFContext, got a second distinct one.'); + } +} + +// ---- Append + COW helpers ------------------------------------------ + +function _appendFromTemp(temp, fromOffset, lenSlots) { + for (let i = 0; i < lenSlots; i++) { + arrayMain[arrayMainLen + i] = temp[fromOffset + i]; + } + arrayMainLen += lenSlots; +} + +function _appendArray(arr) { + const len = arr.length; + for (let i = 0; i < len; i++) arrayMain[arrayMainLen + i] = arr[i]; + arrayMainLen += len; +} + +// COW: copy this array's range to arrayMain's tail. If already at +// the HWM, nothing to copy -- return d unchanged. +function _cow(pa) { + const d = pa.d; + const start = _start(d); + const length = _length(d); + if (start + length === arrayMainLen) return d; // at HWM + const newStart = arrayMainLen; + for (let i = 0; i < length; i++) arrayMain[arrayMainLen + i] = arrayMain[start + i]; + arrayMainLen += length; + return pack(newStart, length); +} + +// ---- Construction -------------------------------------------------- +// +// Use a plain-function constructor (`_FastArray`) with the prototype +// aliased to PDFArray.prototype instead of `Object.create + writes`. +// Same shape change fast-refs-class and fast-dict-onebuf made: V8 +// gives `new`-built instances a stable hidden class from the first +// instance and drops per-instance cost vs the slow-property path +// taken by Object.create + later property writes. +// +// No subclass dispatch needed -- PDFArray has no subclasses in +// pdf-lib (unlike PDFDict's PDFCatalog / PDFPageTree / PDFPageLeaf). + +function _FastArray(d) { this.d = d; } +_FastArray.prototype = PDFArray.prototype; + +function _makeFromRange(start, length, ctx) { + _registerContext(ctx); + return new _FastArray(pack(start, length)); +} + +function _makeFromAppend(arr, ctx) { + const start = arrayMainLen; + _appendArray(arr); + return _makeFromRange(start, arr.length, ctx); +} + +if (!PDFArray.prototype.__fastArrayOnebufInstalled) { + + // ---- PDFArray.prototype ----------------------------------------- + + PDFArray.prototype.size = function () { + return _length(this.d); + }; + + PDFArray.prototype.push = function (object) { + const d0 = this.d; + const start0 = _start(d0); + const length0 = _length(d0); + let dNow = d0; + if (start0 + length0 !== arrayMainLen) { + dNow = _cow(this); + } + arrayMain[arrayMainLen++] = object; + const start = _start(dNow); + this.d = pack(start, length0 + 1); + }; + + PDFArray.prototype.get = function (index) { + return arrayMain[_start(this.d) + index]; + }; + + PDFArray.prototype.set = function (index, object) { + arrayMain[_start(this.d) + index] = object; + }; + + PDFArray.prototype.indexOf = function (object) { + const d = this.d; + const start = _start(d); + const length = _length(d); + for (let i = 0; i < length; i++) { + if (arrayMain[start + i] === object) return i; + } + return undefined; + }; + + PDFArray.prototype.insert = function (index, object) { + // Always COW -- shifting elements in place would corrupt other + // arrays' ranges past this one. + const d0 = this.d; + const start0 = _start(d0); + const length0 = _length(d0); + const newStart = arrayMainLen; + for (let i = 0; i < index; i++) { + arrayMain[arrayMainLen++] = arrayMain[start0 + i]; + } + arrayMain[arrayMainLen++] = object; + for (let i = index; i < length0; i++) { + arrayMain[arrayMainLen++] = arrayMain[start0 + i]; + } + this.d = pack(newStart, length0 + 1); + }; + + PDFArray.prototype.remove = function (index) { + // Always COW (same reason as insert). + const d0 = this.d; + const start0 = _start(d0); + const length0 = _length(d0); + const newStart = arrayMainLen; + for (let i = 0; i < length0; i++) { + if (i === index) continue; + arrayMain[arrayMainLen++] = arrayMain[start0 + i]; + } + this.d = pack(newStart, length0 - 1); + }; + + PDFArray.prototype.asArray = function () { + const d = this.d; + const start = _start(d); + const length = _length(d); + const out = new Array(length); + for (let i = 0; i < length; i++) out[i] = arrayMain[start + i]; + return out; + }; + + PDFArray.prototype.clone = function (context) { + const d = this.d; + const start = _start(d); + const length = _length(d); + const newStart = arrayMainLen; + for (let i = 0; i < length; i++) arrayMain[arrayMainLen + i] = arrayMain[start + i]; + arrayMainLen += length; + _registerContext(context || _singletonContext); + return new _FastArray(pack(newStart, length)); + }; + + PDFArray.prototype.toString = function () { + const d = this.d; + const start = _start(d); + const length = _length(d); + let s = '[ '; + for (let i = 0; i < length; i++) s += arrayMain[start + i].toString() + ' '; + return s + ']'; + }; + + PDFArray.prototype.sizeInBytes = function () { + const d = this.d; + const start = _start(d); + const end = start + _length(d); + let size = 3; + for (let i = start; i < end; i++) size += arrayMain[i].sizeInBytes() + 1; + return size; + }; + + PDFArray.prototype.copyBytesInto = function (buffer, offset) { + const initialOffset = offset; + buffer[offset++] = CharCodes.LeftSquareBracket; + buffer[offset++] = CharCodes.Space; + const d = this.d; + const start = _start(d); + const end = start + _length(d); + for (let i = start; i < end; i++) { + offset += arrayMain[i].copyBytesInto(buffer, offset); + buffer[offset++] = CharCodes.Space; + } + buffer[offset++] = CharCodes.RightSquareBracket; + return offset - initialOffset; + }; + + // lookup, lookupMaybe, asRectangle, scalePDFNumbers stay on the + // upstream prototype -- they call this.get / this.size / this.set + // and dispatch through our overrides. + + Object.defineProperty(PDFArray.prototype, 'context', { + get() { return _singletonContext; }, + set(_ctx) { /* singleton is source of truth */ }, + configurable: true, + }); + + // ---- PDFArray factory ------------------------------------------- + + PDFArray.withContext = function (context) { + return _makeFromAppend([], context); + }; + + // ---- PDFObjectParser.prototype.parseArray ----------------------- + // + // Same temp/commit pattern as fast-dict-onebuf's parseDict: + // each parser instance carries its own _arrayTemp + length cursor; + // parseArray pushes elements onto temp's tail, commits the frame + // to arrayMain in one contiguous append, pops temp back to + // frameStart, returns a PDFArray view into arrayMain. + + PDFObjectParser.prototype.parseArray = function fastParseArrayOneBuf() { + const bytes = this.bytes; + bytes.assertNext(CharCodes.LeftSquareBracket); + this.skipWhitespaceAndComments(); + + if (this._arrayTemp === undefined) { + this._arrayTemp = new Array(64); // grows naturally if needed + this._arrayTempLen = 0; + } + const temp = this._arrayTemp; + const frameStart = this._arrayTempLen; + + while (bytes.peek() !== CharCodes.RightSquareBracket) { + const element = this.parseObject(); // may recurse + temp[this._arrayTempLen++] = element; + this.skipWhitespaceAndComments(); + } + bytes.assertNext(CharCodes.RightSquareBracket); + + const frameLen = this._arrayTempLen - frameStart; + const start = arrayMainLen; + _appendFromTemp(temp, frameStart, frameLen); + this._arrayTempLen = frameStart; + + return _makeFromRange(start, frameLen, this.context); + }; + + PDFArray.prototype.__fastArrayOnebufInstalled = true; +} diff --git a/docs/lib/fast-decode-name.mjs b/docs/lib/fast-decode-name.mjs new file mode 100644 index 0000000..0f20a9f --- /dev/null +++ b/docs/lib/fast-decode-name.mjs @@ -0,0 +1,70 @@ +// Skip pdf-lib's decodeName regex scan when the input has no `#`. +// +// The upstream PDFName.of +// ([PDFName.js:100](node_modules/pdf-lib/cjs/core/objects/PDFName.js:100)) +// is the gatekeeper for every PDFName instance the parser builds: +// +// PDFName.of = function (name) { +// var decodedValue = decodeName(name); // <-- always runs +// var instance = pool.get(decodedValue); +// if (!instance) { ... } +// return instance; +// }; +// +// and decodeName at line 9 is: +// +// name.replace(/#([\dABCDEF]{2})/g, function (_, hex) { ... }) +// +// PDF spec (ISO 32000-1 §7.3.5) requires `#XX` hex-escape for any +// byte outside printable-ASCII or for delimiters / whitespace. In +// real PDFs almost no names use it. Instrumenting on the book: +// +// PDFName.of calls : 2,759,635 +// raw input has # char : 2 (0.000%) +// +// So decodeName runs a regex scan against 2.76 M strings to find a +// `#` that's only there twice in the whole load. Profile attributes +// ~168 ms (7 %) of process self-time to this function. +// +// Shim: a parallel Map keyed by the raw `name` +// argument. When `name` contains no `#`, decoded form equals raw +// form, so our key matches pdf-lib's internal pool key and a hit +// returns the deduped instance with zero regex work. Misses +// delegate to the original (which does the regex scan once and +// stores the instance in pdf-lib's pool); we cache the result so +// every subsequent occurrence of the same name hits our fast path. +// +// Names containing `#` fall through to the original unchanged -- +// the correctness path (e.g. uppercase-only regex, lowercase escapes +// silently un-decoded) is preserved exactly. +// +// Mechanism: PDFName is re-exported from pdf-lib's index, so we can +// patch PDFName.of directly without reaching into CJS internals. +// Static initializers (PDFName.Length, .FlateDecode, ...) ran when +// pdf-lib's module body executed -- before this shim imports -- so +// pdf-lib's pool is already populated with the canonical instances +// the parser will see. +// +// Side-effecting import. Import once before any pdf-lib operation: +// +// import "./lib/fast-decode-name.mjs"; +// +// Idempotent -- repeated imports do nothing after the first. + +import { PDFName } from "pdf-lib"; + +if (!PDFName.__fastDecodeNameInstalled) { + const original = PDFName.of; + const fastCache = new Map(); + PDFName.of = function fastOf(name) { + if (name.indexOf("#") === -1) { + const cached = fastCache.get(name); + if (cached) return cached; + const instance = original.call(PDFName, name); + fastCache.set(name, instance); + return instance; + } + return original.call(PDFName, name); + }; + PDFName.__fastDecodeNameInstalled = true; +} diff --git a/docs/lib/fast-dict-array.mjs b/docs/lib/fast-dict-array.mjs new file mode 100644 index 0000000..5f70985 --- /dev/null +++ b/docs/lib/fast-dict-array.mjs @@ -0,0 +1,328 @@ +// Replace PDFDict's backing Map with a flat alternating array +// [k0, v0, k1, v1, ...]. +// +// Motivation. The sampling heap profile of the process phase (see +// "Profiling pdf-lib heap allocation" in perf/README.md) put `Map` +// constructors and `Map.prototype.set` at 50 % of total allocations +// -- ~63 MB combined -- with ~80 % of that traffic coming from one +// site: fastParseDict's per-dict accumulator +// ([fast-parse-dict.mjs:62](docs/lib/fast-parse-dict.mjs:62)). +// +// const dict = new Map(); // 24 MB of Map() constructors +// while (...) { +// const key = this.parseName(); +// const value = this.parseObject(); +// dict.set(key, value); // 38 MB of Map.set entries +// } +// ... PDFDict.fromMapWithContext(dict, this.context); +// +// Each parsed dict pays for one Map header + one hash-table backing +// arena + one bucket allocation per entry. PDF dicts are tiny (typical +// has <= 10 entries, often 2-3), so the hash-table overhead is pure +// loss vs a linear scan -- and the Map's amortized O(1) lookup buys +// nothing because nobody iterates a parsed dict enough times for the +// hash to pay back. +// +// The fix: store entries in a flat array. One allocation per dict +// (the array itself; the inline alternating layout avoids any per- +// entry bucket alloc). Lookup is a linear scan, which beats Map.get +// at this size class on every V8 microbench I've seen. +// +// Mechanism. We do three things: +// +// 1. Patch PDFDict.prototype.{keys, values, entries, set, get, has, +// delete, asMap, clone, toString, sizeInBytes, copyBytesInto} so +// `this.dict` is read as a flat array instead of a Map. +// sizeInBytes / copyBytesInto subsume fast-dict-iter.mjs (no +// Map.forEach + thisArg context object needed; iteration is just +// `for (let i = 0; i < arr.length; i += 2)`). +// +// 2. Patch PDFDict.withContext, PDFDict.fromMapWithContext, and the +// parallel fromMapWithContext / withContextAndPages helpers on +// PDFCatalog / PDFPageTree / PDFPageLeaf, plus PDFPageLeaf's +// clone() which constructs `new Map()` directly. Each of these is +// rewritten to produce / accept a flat array; the Map argument is +// converted at the seam (rare-path cost, only a few dicts per +// document hit these factories). +// +// 3. Patch PDFObjectParser.prototype.parseDict so the parser's hot +// inner loop accumulates into a flat array directly (no Map(), no +// Map.set). The Type-sentinel dispatch at the tail becomes a +// short linear scan over the array; on dicts that have a /Type +// entry it's the first or second key (PDF convention), so the +// scan is effectively O(1). This subsumes fast-parse-dict.mjs. +// +// Compatibility. Every consumer of `dict.dict.X` inside pdf-lib +// (ViewerPreferences, AppearanceCharacteristics, PDFAcroField, +// PDFAcroChoice, PDFAcroText, PDFAcroForm, PDFAnnotation, +// PDFWidgetAnnotation, BorderStyle, PDFStreamWriter, PDFCrossRefStream, +// PDFObjectCopier, PDFXRefStreamParser, etc.) goes through +// PDFDict.prototype methods (.set / .get / .has / .delete / .entries / +// .lookup), all of which we re-implement to read the array. Nobody in +// the codebase touches `dict.dict` expecting a Map iterator -- grep +// confirmed. `asMap()` still returns a fresh `new Map(...)` for any +// caller that genuinely wants a Map view. +// +// This shim is mutually exclusive with --fast-parse-dict and +// --fast-dict-iter: both are subsumed and would re-install the +// Map-based methods if loaded afterwards. measure.mjs enforces this. +// +// Side-effecting import. Import once before any pdf-lib operation: +// +// import "./lib/fast-dict-array.mjs"; +// +// Idempotent -- repeated imports do nothing after the first. + +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const PDFDict = require('pdf-lib/cjs/core/objects/PDFDict.js').default; +const PDFCatalog = require('pdf-lib/cjs/core/structures/PDFCatalog.js').default; +const PDFPageTree = require('pdf-lib/cjs/core/structures/PDFPageTree.js').default; +const PDFPageLeaf = require('pdf-lib/cjs/core/structures/PDFPageLeaf.js').default; +const PDFName = require('pdf-lib/cjs/core/objects/PDFName.js').default; +const PDFNull = require('pdf-lib/cjs/core/objects/PDFNull.js').default; +const PDFObjectParser = require('pdf-lib/cjs/core/parser/PDFObjectParser.js').default; +const CharCodes = require('pdf-lib/cjs/core/syntax/CharCodes.js').default; + +// Captured canonical PDFNames for the parser's Type-dispatch tail. +// Pool-dedup ([PDFName.js:18,100]) guarantees reference equality with +// whatever the parser sees inside the dict. +const TypeName = PDFName.of('Type'); +const CatalogName = PDFName.of('Catalog'); +const PagesName = PDFName.of('Pages'); +const PageName = PDFName.of('Page'); + +// Map -> flat array. Called at the seam from the factories below; not +// on the hot parse path. +function mapToArray(map) { + const arr = new Array(map.size * 2); + let i = 0; + for (const [k, v] of map) { arr[i++] = k; arr[i++] = v; } + return arr; +} + +// Linear scan for the index of `key` in [k0, v0, k1, v1, ...]; returns +// the key-slot index, or -1 if absent. +function indexOfKey(arr, key) { + for (let i = 0, len = arr.length; i < len; i += 2) { + if (arr[i] === key) return i; + } + return -1; +} + +if (!PDFDict.prototype.__fastDictArrayInstalled) { + + // ---- PDFDict.prototype -------------------------------------------- + + PDFDict.prototype.keys = function () { + const arr = this.dict; + const out = new Array(arr.length >> 1); + for (let i = 0, j = 0, len = arr.length; i < len; i += 2, j++) out[j] = arr[i]; + return out; + }; + + PDFDict.prototype.values = function () { + const arr = this.dict; + const out = new Array(arr.length >> 1); + for (let i = 1, j = 0, len = arr.length; i < len; i += 2, j++) out[j] = arr[i]; + return out; + }; + + PDFDict.prototype.entries = function () { + const arr = this.dict; + const out = new Array(arr.length >> 1); + for (let i = 0, j = 0, len = arr.length; i < len; i += 2, j++) { + out[j] = [arr[i], arr[i + 1]]; + } + return out; + }; + + PDFDict.prototype.set = function (key, value) { + const arr = this.dict; + const idx = indexOfKey(arr, key); + if (idx >= 0) { + arr[idx + 1] = value; + } else { + arr.push(key, value); + } + }; + + PDFDict.prototype.get = function (key, preservePDFNull) { + if (preservePDFNull === undefined) preservePDFNull = false; + const arr = this.dict; + const idx = indexOfKey(arr, key); + if (idx < 0) return undefined; + const value = arr[idx + 1]; + if (value === PDFNull && !preservePDFNull) return undefined; + return value; + }; + + PDFDict.prototype.has = function (key) { + const arr = this.dict; + const idx = indexOfKey(arr, key); + if (idx < 0) return false; + const value = arr[idx + 1]; + return value !== undefined && value !== PDFNull; + }; + + PDFDict.prototype.delete = function (key) { + const arr = this.dict; + const idx = indexOfKey(arr, key); + if (idx < 0) return false; + arr.splice(idx, 2); + return true; + }; + + PDFDict.prototype.asMap = function () { + const arr = this.dict; + const m = new Map(); + for (let i = 0, len = arr.length; i < len; i += 2) m.set(arr[i], arr[i + 1]); + return m; + }; + + PDFDict.prototype.clone = function (context) { + const ctx = context || this.context; + const cloned = this.dict.slice(); + return new PDFDict(cloned, ctx); + }; + + PDFDict.prototype.toString = function () { + const arr = this.dict; + let s = '<<\n'; + for (let i = 0, len = arr.length; i < len; i += 2) { + s += arr[i].toString() + ' ' + arr[i + 1].toString() + '\n'; + } + return s + '>>'; + }; + + PDFDict.prototype.sizeInBytes = function () { + const arr = this.dict; + let size = 5; + for (let i = 0, len = arr.length; i < len; i += 2) { + size += arr[i].sizeInBytes() + arr[i + 1].sizeInBytes() + 2; + } + return size; + }; + + PDFDict.prototype.copyBytesInto = function (buffer, offset) { + const initialOffset = offset; + buffer[offset++] = CharCodes.LessThan; + buffer[offset++] = CharCodes.LessThan; + buffer[offset++] = CharCodes.Newline; + const arr = this.dict; + for (let i = 0, len = arr.length; i < len; i += 2) { + offset += arr[i].copyBytesInto(buffer, offset); + buffer[offset++] = CharCodes.Space; + offset += arr[i + 1].copyBytesInto(buffer, offset); + buffer[offset++] = CharCodes.Newline; + } + buffer[offset++] = CharCodes.GreaterThan; + buffer[offset++] = CharCodes.GreaterThan; + return offset - initialOffset; + }; + + // ---- PDFDict factories -------------------------------------------- + + PDFDict.withContext = function (context) { + return new PDFDict([], context); + }; + PDFDict.fromMapWithContext = function (map, context) { + return new PDFDict(mapToArray(map), context); + }; + + // ---- Subclass factories ------------------------------------------- + // PDFCatalog.withContextAndPages builds a fresh 2-entry Map; just + // hand it the equivalent 2-entry array. + + PDFCatalog.withContextAndPages = function (context, pages) { + return new PDFCatalog( + [PDFName.of('Type'), CatalogName, PagesName, pages], + context, + ); + }; + PDFCatalog.fromMapWithContext = function (map, context) { + return new PDFCatalog(mapToArray(map), context); + }; + + PDFPageTree.fromMapWithContext = function (map, context) { + return new PDFPageTree(mapToArray(map), context); + }; + + PDFPageLeaf.fromMapWithContext = function (map, context, autoNormalizeCTM) { + return new PDFPageLeaf(mapToArray(map), context, autoNormalizeCTM); + }; + // PDFPageLeaf.prototype.clone constructs `new Map()` explicitly, + // then copies via this.entries() + clone.set(); since clone.set is + // PDFDict.prototype.set (now array-aware), it works as long as + // fromMapWithContext receives an empty Map and converts it. + // mapToArray(new Map()) yields []; nothing to patch here. + + // ---- PDFObjectParser.prototype.parseDict -------------------------- + // Subsumes fast-parse-dict.mjs: no `new Map()`, no `dict.set(...)` + // in the hot inner loop. The Type-sentinel dispatch at the tail is + // a short linear scan; PDF convention places /Type first, so it's + // effectively O(1) per dict. + + // Initial capacity for the per-dict accumulator. NOT a scratch + // buffer (the array isn't reused across calls -- it's allocated + // fresh each dict, filled with parsed entries, and handed to the + // PDFDict constructor where it lives as `pdfDict.dict` for the + // document's lifetime). Just a pre-sized initial capacity that + // skips push-grow's reallocation chain. + // + // Histogram from the book parse (see instrument-parsedict.mjs): + // 5-entry dicts dominate (52 %, exactly 10 push slots), 4-entry + // next (28 %, 8 slots), long tail to 7-8 entries. INITIAL_SLOTS = + // 10 is exact-fit for the median case; smaller dicts (2/3/4 + // entries) waste a few slots, larger ones (7+) take one growth + // via push. Cuts ~70 bytes of FixedArray-header allocation per + // dict vs INITIAL_SLOTS=16 -- on 261 k dict invocations that + // adds up. + const INITIAL_SLOTS = 10; + PDFObjectParser.prototype.parseDict = function fastParseDictArray() { + const bytes = this.bytes; + bytes.assertNext(CharCodes.LessThan); + bytes.assertNext(CharCodes.LessThan); + this.skipWhitespaceAndComments(); + const arr = new Array(INITIAL_SLOTS); + let len = 0; + while (!bytes.done() && + bytes.peek() !== CharCodes.GreaterThan && + bytes.peekAhead(1) !== CharCodes.GreaterThan) { + const key = this.parseName(); + const value = this.parseObject(); + if (len < INITIAL_SLOTS) { + arr[len] = key; + arr[len + 1] = value; + } else { + // Rare overflow path: set length to current len so push + // appends at the right offset, then grow naturally. + arr.length = len; + arr.push(key, value); + } + len += 2; + this.skipWhitespaceAndComments(); + } + this.skipWhitespaceAndComments(); + bytes.assertNext(CharCodes.GreaterThan); + bytes.assertNext(CharCodes.GreaterThan); + arr.length = len; + + // Type-sentinel dispatch. Inline-scan for TypeName; in practice + // it's at arr[0] or arr[2]. + let Type; + for (let i = 0; i < len; i += 2) { + if (arr[i] === TypeName) { Type = arr[i + 1]; break; } + } + if (Type === CatalogName) return new PDFCatalog(arr, this.context); + if (Type === PagesName) return new PDFPageTree(arr, this.context); + if (Type === PageName) return new PDFPageLeaf(arr, this.context); + return new PDFDict(arr, this.context); + }; + + PDFDict.prototype.__fastDictArrayInstalled = true; + // Mark the subsumed shims as installed so a redundant load is a no-op. + PDFDict.prototype.__fastDictIterInstalled = true; + PDFObjectParser.prototype.__fastParseDictInstalled = true; +} diff --git a/docs/lib/fast-dict-iter.mjs b/docs/lib/fast-dict-iter.mjs new file mode 100644 index 0000000..1d2a6cb --- /dev/null +++ b/docs/lib/fast-dict-iter.mjs @@ -0,0 +1,81 @@ +// Replace pdf-lib's PDFDict.sizeInBytes and PDFDict.copyBytesInto -- both of +// which materialize a fresh Array of [key, value] tuples via this.entries() +// on every call -- with versions that iterate the underlying Map in place. +// +// The upstream entries() helper +// ([PDFDict.js:22](node_modules/pdf-lib/cjs/core/objects/PDFDict.js:22)) is: +// +// PDFDict.prototype.entries = function () { +// return Array.from(this.dict.entries()); +// }; +// +// Per call that is: one MapIterator + one outer Array + one fresh +// [key, value] tuple per entry (allocated by the iterator itself). The save +// path fires both consumers on every dict (sizeInBytes to measure first, +// then copyBytesInto to write), so on the book that's ~100 k Array.from +// calls feeding the GC; PDFDict.entries was the largest non-GC row in the +// process profile (~10 % of process self-time) and (garbage collector) sat +// at the top. +// +// Map.prototype.forEach((value, key) => ...) calls back with positional +// arguments and never allocates a tuple. The two consumers don't need the +// tuple form -- they immediately destructure -- so swapping is local. +// +// We do NOT touch PDFDict.prototype.entries itself: clone() and toString() +// still call it and rely on the Array-of-tuples contract. Those paths fire +// rarely (clone on incremental updates only, toString in debug output) and +// aren't worth the contract churn. +// +// Side-effecting import. Import once before any pdf-lib save: +// +// import "./lib/fast-dict-iter.mjs"; +// +// Idempotent -- repeated imports do nothing after the first. + +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const PDFDict = require('pdf-lib/cjs/core/objects/PDFDict.js').default; +const CharCodes = require('pdf-lib/cjs/core/syntax/CharCodes.js').default; + +// Callbacks are module-level (not closures) so Map.forEach reuses the same +// function reference on every call instead of allocating a fresh context +// per invocation. Per-call state is threaded through forEach's `thisArg` +// (one small object alloc per call, instead of one closure context plus +// one heap cell for the captured `offset` mutation). +function _sizeInBytesEntry(value, key) { + this.s += key.sizeInBytes() + value.sizeInBytes() + 2; +} + +function _copyBytesIntoEntry(value, key) { + const buf = this.buf; + let off = this.off; + off += key.copyBytesInto(buf, off); + buf[off++] = CharCodes.Space; + off += value.copyBytesInto(buf, off); + buf[off++] = CharCodes.Newline; + this.off = off; +} + +if (!PDFDict.prototype.__fastDictIterInstalled) { + PDFDict.prototype.sizeInBytes = function () { + const ctx = { s: 5 }; + this.dict.forEach(_sizeInBytesEntry, ctx); + return ctx.s; + }; + + PDFDict.prototype.copyBytesInto = function (buffer, offset) { + const initialOffset = offset; + buffer[offset++] = CharCodes.LessThan; + buffer[offset++] = CharCodes.LessThan; + buffer[offset++] = CharCodes.Newline; + const ctx = { buf: buffer, off: offset }; + this.dict.forEach(_copyBytesIntoEntry, ctx); + offset = ctx.off; + buffer[offset++] = CharCodes.GreaterThan; + buffer[offset++] = CharCodes.GreaterThan; + return offset - initialOffset; + }; + + PDFDict.prototype.__fastDictIterInstalled = true; +} diff --git a/docs/lib/fast-dict-onebuf.mjs b/docs/lib/fast-dict-onebuf.mjs new file mode 100644 index 0000000..888705c --- /dev/null +++ b/docs/lib/fast-dict-onebuf.mjs @@ -0,0 +1,546 @@ +// One-buffer PDFDict: every committed entry lives in a single +// append-only array (main), kept for the document's lifetime. The +// parser uses a small per-instance temp array as a stack of recursion +// frames; each parseDict invocation appends to temp, commits its +// frame to main in one contiguous range, and pops temp back. After +// parseDocument completes, temp is released. PDFDict instances only +// ever read from main, so the bufIdx field disappears from the +// packed value -- frees up bits. +// +// 41-bit packed Number layout (well within Number.MAX_SAFE_INTEGER): +// bits 0-22: start (23 bits, max 8.4 M slots in main; mainLen ~2.3 M today) +// bit 23: PDFPageLeaf `normalized` flag (zero on all other dict subtypes) +// bit 24: PDFPageLeaf `autoNormalizeCTM` flag (zero on all other dict subtypes) +// bits 25-40: length (16 bits, max 65 535 slots; max observed 8 706) +// bits 41-52: spare (12 bits; unused, available headroom) +// +// V8 Smi (31-bit signed) covers values < 2^30. start + length*2^25 stays +// Smi iff length < 32 (the 2^30 boundary). Beyond that, `d` boxes to a +// HeapNumber but bit math via `& MASK_*` and `+`/`-` continues to work -- +// reads still extract bits 0..30 correctly via Int32 coercion, writes +// use arithmetic so high bits survive. +// +// PDFPageLeaf collapses to the same single-`d` field as plain PDFDict; +// `normalized` and `autoNormalizeCTM` are gettters/setters that mask +// in/out of `d`'s bits 23 and 24. Heap floor matches `_FastDict` (no +// separate boolean property slots). +// +// Recursion. Outer parseDict pushes entries onto temp. Calling +// this.parseObject() to parse a value may recurse to inner +// parseDict, which appends ON TOP of outer's pending entries. Inner +// commits its frame to main in one append, then pops temp back to +// the level it started at -- outer's frame is intact at the top of +// temp again. Outer continues, eventually committing its (now +// contiguous in temp) entries to main in one append. Outer's and +// inner's ranges in main do not overlap; each was committed as a +// single contiguous block at distinct points in time. +// +// Mutations: +// - set with existing key: in-place replace (safe; no shifts) +// - set with new key, dict at main's high-water mark: in-place +// push (extend the range) +// - set with new key, dict NOT at high-water mark: COW (copy +// range to main's tail, then push the new pair, update encoded +// value to the new range) +// - delete: COW (copy range minus deleted entry to tail) +// The at-HWM check fully determines whether extending is safe; +// each dict's range is unique to that dict (no slot sharing), so +// extending past the dict's end at HWM never disturbs anything. +// An earlier design tracked an owned/shared bit to gate this; it +// was redundant -- shared dicts at HWM extend just as safely as +// owned ones. +// +// Singleton PDFContext (one PDFDocument.load per process in our +// pipeline; throws if a second distinct context appears). +// +// Mutually exclusive with --fast-dict-double / --fast-dict-view / +// --fast-dict-array. + +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const PDFDict = require('pdf-lib/cjs/core/objects/PDFDict.js').default; +const PDFCatalog = require('pdf-lib/cjs/core/structures/PDFCatalog.js').default; +const PDFPageTree = require('pdf-lib/cjs/core/structures/PDFPageTree.js').default; +const PDFPageLeaf = require('pdf-lib/cjs/core/structures/PDFPageLeaf.js').default; +const PDFName = require('pdf-lib/cjs/core/objects/PDFName.js').default; +const PDFNull = require('pdf-lib/cjs/core/objects/PDFNull.js').default; +const PDFObjectParser = require('pdf-lib/cjs/core/parser/PDFObjectParser.js').default; +const CharCodes = require('pdf-lib/cjs/core/syntax/CharCodes.js').default; + +const TypeName = PDFName.of('Type'); +const CatalogName = PDFName.of('Catalog'); +const PagesName = PDFName.of('Pages'); +const PageName = PDFName.of('Page'); + +// ---- The single buffer + temp --------------------------------------- + +// Pre-sized to total entries + slack measured on the book. Other +// workloads grow it naturally (V8-amortized array growth from this +// starting size). When the measure-pass shim runs first, it calls +// setExpectedDictSlots() before parse, which resizes `main` to exact +// measured demand via `main.length = N`. +const MAIN_INITIAL_CAP = 2400000; +const main = new Array(MAIN_INITIAL_CAP); +let mainLen = 0; + +// Exposed for measurement-only consumers (perf/instrument-*.mjs). +// The encoded `d` values held by PDFDict instances reference main by +// (start, length); reading the slots requires access to main itself. +export { main }; +export function getMainLen() { return mainLen; } + +// Replace `main` with an exact-sized backing array. Must be called +// before any parseDict / withContext / fromMapWithContext (i.e. while +// mainLen is still 0). `slack` is a multiplier on `slots`; default 1.0 +// (exact). Use a small slack only if the measure pass is approximate. +export function setExpectedDictSlots(slots, slack = 1.0) { + if (mainLen > 0) { + throw new Error( + `fast-dict-onebuf: setExpectedDictSlots called after parse started (mainLen=${mainLen})`, + ); + } + const sized = Math.ceil(slots * slack); + // Resize in place rather than reassigning. Reassigning the module- + // level `main` binding invalidates V8's inline-cache slots in every + // closure that reads it -- the closures get deopted on first call + // and recompile against the new array, with a parse-time allocation + // spike attributed to _appendEntries (~27 MB sampled on the book). + // `main.length = N` keeps the same Array identity; ICs stay valid. + main.length = sized; +} + +// ---- Bit-packing helpers -------------------------------------------- + +const POW_23 = 1 << 23; // 8 388 608 -- gap-bit base / start ceiling +const POW_25 = 1 << 25; // 33 554 432 -- length multiplier +const MASK_23 = 0x7FFFFF; // 23-bit start mask +const MASK_16 = 0xFFFF; // 16-bit length mask + +const NORM_BIT = POW_23; // bit 23: PDFPageLeaf `normalized` +const AUTO_BIT = POW_23 * 2; // bit 24: PDFPageLeaf `autoNormalizeCTM` +const GAP_MASK = NORM_BIT | AUTO_BIT; + +const MAX_START = POW_23; // exclusive +const MAX_LENGTH = 1 << 16; // 65536, exclusive + +function pack(start, length) { + if (start >= MAX_START) throw new Error(`fast-dict-onebuf: start ${start} exceeds 23-bit budget`); + if (length >= MAX_LENGTH) throw new Error(`fast-dict-onebuf: length ${length} exceeds 16-bit budget`); + return start + length * POW_25; +} + +// Read start (bits 0-22) and length (bits 25-40). Both work on +// HeapNumber'd d: `& MASK_23` lives in low 32 bits (Int32 coercion +// reads it correctly); `Math.floor(d / POW_25)` operates on the full +// Number range before the `& MASK_16` truncates. +function _start(d) { return d & MASK_23; } +function _length(d) { return Math.floor(d / POW_25) & MASK_16; } + +// ---- Singleton context --------------------------------------------- + +let _singletonContext = null; + +function _registerContext(ctx) { + if (_singletonContext === null) { + _singletonContext = ctx; + } else if (_singletonContext !== ctx) { + throw new Error('fast-dict-onebuf: expected a singleton PDFContext, got a second distinct one.'); + } +} + +// ---- Append helpers ------------------------------------------------ + +function _appendEntries(entries, fromOffset, lenSlots) { + for (let i = 0; i < lenSlots; i++) { + main[mainLen + i] = entries[fromOffset + i]; + } + mainLen += lenSlots; +} + +function _appendArray(arr) { + const len = arr.length; + for (let i = 0; i < len; i++) main[mainLen + i] = arr[i]; + mainLen += len; +} + +// COW: copy this dict's range to main's tail, return the new packed +// value anchored at the new range. If we're already at the HWM, +// nothing to copy -- return d unchanged. +// +// Gap bits (bits 23-24, used by PDFPageLeaf for normalized / +// autoNormalizeCTM) are preserved across the repack. For non-PageLeaf +// dicts the mask is zero, so `+ (d & GAP_MASK)` is a no-op. Addition +// is used instead of `|` so the high bits of HeapNumber'd d survive. +function _cow(pd) { + const d = pd.d; + const start = _start(d); + const length = _length(d); + if (start + length === mainLen) return d; // at HWM, extend in place + const newStart = mainLen; + for (let i = 0; i < length; i++) main[mainLen + i] = main[start + i]; + mainLen += length; + return pack(newStart, length) + (d & GAP_MASK); +} + +// ---- Construction --------------------------------------------------- +// +// Use plain-function constructors with the prototype aliased to the +// upstream PDFDict / PDFCatalog / PDFPageTree / PDFPageLeaf prototypes +// instead of `Object.create(proto) + property writes`. V8 gives +// `new`-built instances a stable hidden class derived from the +// assignment order in the constructor body, and per-instance heap cost +// drops materially vs the slow-property path taken by Object.create + +// later writes (the same shape change that fast-refs-class made for +// PDFRef: ~60 B/instance -> ~44 B). For the 260 k+ dicts on the book +// the per-instance gap × instance count is the dominant remaining heap +// row. +// +// One constructor per subclass so V8 sees a single fixed shape per +// kind. PDFPageLeaf collapses to the same single-`d` shape as plain +// PDFDict; `normalized` defaults to false (gap bit 23 clear) and +// `autoNormalizeCTM` defaults to true (gap bit 24 set) -- the bit +// is OR'd in by the constructor below via addition (so HeapNumber'd +// d doesn't lose high bits to Int32 coercion). Both flags become +// prototype getters/setters that mask in/out of bits 23-24. +// Any unknown PDFDict subclass falls back to the original +// Object.create path so the shim doesn't crash on downstream +// extensions (none in our pipeline; defensive only). + +function _FastDict(d) { this.d = d; } +_FastDict.prototype = PDFDict.prototype; + +function _FastCatalog(d) { this.d = d; } +_FastCatalog.prototype = PDFCatalog.prototype; + +function _FastPageTree(d) { this.d = d; } +_FastPageTree.prototype = PDFPageTree.prototype; + +// d arrives from pack(start, length) so bits 23-24 are zero; +// `+ AUTO_BIT` sets bit 24 unconditionally (autoNormalizeCTM = true +// default). Use addition not `|`: if length >= 32, d > 2^30 (HeapNumber) +// and `|` would truncate to Int32 losing high bits. +function _FastPageLeaf(d) { this.d = d + AUTO_BIT; } +_FastPageLeaf.prototype = PDFPageLeaf.prototype; + +function _makeFromRange(ProtoClass, start, length, ctx) { + _registerContext(ctx); + const d = pack(start, length); + if (ProtoClass === PDFDict) return new _FastDict(d); + if (ProtoClass === PDFPageLeaf) return new _FastPageLeaf(d); + if (ProtoClass === PDFCatalog) return new _FastCatalog(d); + if (ProtoClass === PDFPageTree) return new _FastPageTree(d); + // Defensive fallback for any unknown subclass. + const pd = Object.create(ProtoClass.prototype); + pd.d = d; + return pd; +} + +function _makeFromAppend(ProtoClass, arr, ctx) { + const start = mainLen; + _appendArray(arr); + return _makeFromRange(ProtoClass, start, arr.length, ctx); +} + +function mapToArray(map) { + const arr = new Array(map.size * 2); + let i = 0; + for (const [k, v] of map) { arr[i++] = k; arr[i++] = v; } + return arr; +} + +if (!PDFDict.prototype.__fastDictOnebufInstalled) { + + // ---- PDFDict.prototype -------------------------------------------- + + PDFDict.prototype.keys = function () { + const d = this.d; + const start = _start(d); + const length = _length(d); + const out = new Array(length >> 1); + for (let i = 0, j = 0; i < length; i += 2, j++) out[j] = main[start + i]; + return out; + }; + + PDFDict.prototype.values = function () { + const d = this.d; + const start = _start(d); + const length = _length(d); + const out = new Array(length >> 1); + for (let i = 0, j = 0; i < length; i += 2, j++) out[j] = main[start + i + 1]; + return out; + }; + + PDFDict.prototype.entries = function () { + const d = this.d; + const start = _start(d); + const length = _length(d); + const out = new Array(length >> 1); + for (let i = 0, j = 0; i < length; i += 2, j++) { + out[j] = [main[start + i], main[start + i + 1]]; + } + return out; + }; + + PDFDict.prototype.set = function (key, value) { + const d0 = this.d; + const start0 = _start(d0); + const length0 = _length(d0); + // Try in-place replace + for (let i = 0; i < length0; i += 2) { + if (main[start0 + i] === key) { main[start0 + i + 1] = value; return; } + } + // Append: requires the dict to be at main's high-water mark, OR we COW. + let dNow = d0; + if (start0 + length0 !== mainLen) { + dNow = _cow(this); + } + // After _cow (or if we were already at HWM), we abut the tail. + main[mainLen++] = key; + main[mainLen++] = value; + const start = _start(dNow); + // Preserve gap bits (PageLeaf flags) from dNow into the freshly + // packed value. Zero for non-PageLeaf dicts. + this.d = pack(start, length0 + 2) + (dNow & GAP_MASK); + }; + + PDFDict.prototype.get = function (key, preservePDFNull) { + if (preservePDFNull === undefined) preservePDFNull = false; + const d = this.d; + const start = _start(d); + const end = start + _length(d); + for (let i = start; i < end; i += 2) { + if (main[i] === key) { + const value = main[i + 1]; + if (value === PDFNull && !preservePDFNull) return undefined; + return value; + } + } + return undefined; + }; + + PDFDict.prototype.has = function (key) { + const d = this.d; + const start = _start(d); + const end = start + _length(d); + for (let i = start; i < end; i += 2) { + if (main[i] === key) { + const value = main[i + 1]; + return value !== undefined && value !== PDFNull; + } + } + return false; + }; + + PDFDict.prototype.delete = function (key) { + // Always COW for delete: shifting slots in main would corrupt + // other dicts that point into the affected region. + const d0 = this.d; + const start0 = _start(d0); + const length0 = _length(d0); + let foundIdx = -1; + for (let i = 0; i < length0; i += 2) { + if (main[start0 + i] === key) { foundIdx = i; break; } + } + if (foundIdx < 0) return false; + const newStart = mainLen; + for (let i = 0; i < length0; i++) { + if (i === foundIdx || i === foundIdx + 1) continue; + main[mainLen++] = main[start0 + i]; + } + // Preserve gap bits (PageLeaf flags); zero for non-PageLeaf dicts. + this.d = pack(newStart, length0 - 2) + (d0 & GAP_MASK); + return true; + }; + + PDFDict.prototype.asMap = function () { + const d = this.d; + const start = _start(d); + const end = start + _length(d); + const m = new Map(); + for (let i = start; i < end; i += 2) m.set(main[i], main[i + 1]); + return m; + }; + + PDFDict.prototype.clone = function (context) { + const d = this.d; + const start = _start(d); + const length = _length(d); + const newStart = mainLen; + for (let i = 0; i < length; i++) main[mainLen + i] = main[start + i]; + mainLen += length; + _registerContext(context || _singletonContext); + return new _FastDict(pack(newStart, length)); + }; + + PDFDict.prototype.toString = function () { + const d = this.d; + const start = _start(d); + const end = start + _length(d); + let s = '<<\n'; + for (let i = start; i < end; i += 2) { + s += main[i].toString() + ' ' + main[i + 1].toString() + '\n'; + } + return s + '>>'; + }; + + PDFDict.prototype.sizeInBytes = function () { + const d = this.d; + const start = _start(d); + const end = start + _length(d); + let size = 5; + for (let i = start; i < end; i += 2) { + size += main[i].sizeInBytes() + main[i + 1].sizeInBytes() + 2; + } + return size; + }; + + PDFDict.prototype.copyBytesInto = function (buffer, offset) { + const initialOffset = offset; + buffer[offset++] = CharCodes.LessThan; + buffer[offset++] = CharCodes.LessThan; + buffer[offset++] = CharCodes.Newline; + const d = this.d; + const start = _start(d); + const end = start + _length(d); + for (let i = start; i < end; i += 2) { + offset += main[i].copyBytesInto(buffer, offset); + buffer[offset++] = CharCodes.Space; + offset += main[i + 1].copyBytesInto(buffer, offset); + buffer[offset++] = CharCodes.Newline; + } + buffer[offset++] = CharCodes.GreaterThan; + buffer[offset++] = CharCodes.GreaterThan; + return offset - initialOffset; + }; + + Object.defineProperty(PDFDict.prototype, 'context', { + get() { return _singletonContext; }, + set(_ctx) { /* singleton is source of truth */ }, + configurable: true, + }); + + // ---- PDFPageLeaf flag accessors ----------------------------------- + // + // `normalized` and `autoNormalizeCTM` live in bits 23 and 24 of + // `d`. Reads use `& BIT` -- safe on HeapNumber'd d because both + // bits are in the low 32 (Int32 coercion reads them correctly). + // Writes use arithmetic (`d + BIT` / `d - BIT`) gated on the + // current bit state, so high bits of HeapNumber'd d survive. + // No-ops when the flag is already in the requested state. + + Object.defineProperty(PDFPageLeaf.prototype, 'normalized', { + get() { return (this.d & NORM_BIT) !== 0; }, + set(v) { + const d = this.d; + const has = (d & NORM_BIT) !== 0; + if (v && !has) this.d = d + NORM_BIT; + else if (!v && has) this.d = d - NORM_BIT; + }, + configurable: true, + }); + + Object.defineProperty(PDFPageLeaf.prototype, 'autoNormalizeCTM', { + get() { return (this.d & AUTO_BIT) !== 0; }, + set(v) { + const d = this.d; + const has = (d & AUTO_BIT) !== 0; + if (v && !has) this.d = d + AUTO_BIT; + else if (!v && has) this.d = d - AUTO_BIT; + }, + configurable: true, + }); + + // ---- PDFDict factories -------------------------------------------- + + PDFDict.withContext = function (context) { + return _makeFromAppend(PDFDict, [], context); + }; + PDFDict.fromMapWithContext = function (map, context) { + return _makeFromAppend(PDFDict, mapToArray(map), context); + }; + + PDFCatalog.withContextAndPages = function (context, pages) { + return _makeFromAppend( + PDFCatalog, + [PDFName.of('Type'), CatalogName, PagesName, pages], + context, + ); + }; + PDFCatalog.fromMapWithContext = function (map, context) { + return _makeFromAppend(PDFCatalog, mapToArray(map), context); + }; + + PDFPageTree.fromMapWithContext = function (map, context) { + return _makeFromAppend(PDFPageTree, mapToArray(map), context); + }; + + PDFPageLeaf.fromMapWithContext = function (map, context, autoNormalizeCTM) { + const d = _makeFromAppend(PDFPageLeaf, mapToArray(map), context); + if (autoNormalizeCTM !== undefined) d.autoNormalizeCTM = autoNormalizeCTM; + return d; + }; + + // ---- PDFObjectParser.prototype.parseDict -------------------------- + // + // Each parser instance carries its own temp array (small; sized to + // peak recursion-depth-stack of entries) plus a length cursor. + // parseDict pushes entries onto temp's tail; on completion, commits + // its frame to main in one contiguous append, pops temp back to + // frameStart, and returns a PDFDict view into main. + + PDFObjectParser.prototype.parseDict = function fastParseDictOneBuf() { + const bytes = this.bytes; + bytes.assertNext(CharCodes.LessThan); + bytes.assertNext(CharCodes.LessThan); + this.skipWhitespaceAndComments(); + + if (this._dictTemp === undefined) { + this._dictTemp = new Array(64); // grows naturally if needed + this._dictTempLen = 0; + } + const temp = this._dictTemp; + const frameStart = this._dictTempLen; + + while (!bytes.done() && + bytes.peek() !== CharCodes.GreaterThan && + bytes.peekAhead(1) !== CharCodes.GreaterThan) { + const key = this.parseName(); + const value = this.parseObject(); // may recurse; temp grows / shrinks + const len = this._dictTempLen; + temp[len] = key; + temp[len + 1] = value; + this._dictTempLen = len + 2; + this.skipWhitespaceAndComments(); + } + this.skipWhitespaceAndComments(); + bytes.assertNext(CharCodes.GreaterThan); + bytes.assertNext(CharCodes.GreaterThan); + + const frameLen = this._dictTempLen - frameStart; + // Commit this frame to main in one contiguous append + const start = mainLen; + _appendEntries(temp, frameStart, frameLen); + // Pop our frame off temp + this._dictTempLen = frameStart; + + // Type-sentinel dispatch (scan the frame we just committed) + let Type; + const end = start + frameLen; + for (let i = start; i < end; i += 2) { + if (main[i] === TypeName) { Type = main[i + 1]; break; } + } + if (Type === CatalogName) return _makeFromRange(PDFCatalog, start, frameLen, this.context); + if (Type === PagesName) return _makeFromRange(PDFPageTree, start, frameLen, this.context); + if (Type === PageName) return _makeFromRange(PDFPageLeaf, start, frameLen, this.context); + return _makeFromRange(PDFDict, start, frameLen, this.context); + }; + + PDFDict.prototype.__fastDictOnebufInstalled = true; + // Mark subsumed shims as installed. + PDFDict.prototype.__fastDictDoubleInstalled = true; + PDFDict.prototype.__fastDictViewInstalled = true; + PDFDict.prototype.__fastDictArrayInstalled = true; + PDFDict.prototype.__fastDictIterInstalled = true; + PDFObjectParser.prototype.__fastParseDictInstalled = true; +} diff --git a/docs/lib/fast-indirect-objects.mjs b/docs/lib/fast-indirect-objects.mjs new file mode 100644 index 0000000..9058414 --- /dev/null +++ b/docs/lib/fast-indirect-objects.mjs @@ -0,0 +1,174 @@ +// Replace PDFContext.indirectObjects (Map) with a +// dense array keyed by objectNumber for the gen=0 path. +// +// Motivation. After fast-dict-array shipped, the only remaining hot +// Map.set in the process-phase heap profile was +// PDFContext.assign's `this.indirectObjects.set(ref, object)`: +// +// $ node find-heap-callers.mjs .heapprofile set +// set: total=14.49 MB +// 7168.04 KB PDFParser.parseIndirectObjectHeader +// 7168.04 KB parseIndirectObjectSync @ fast-sync-load.mjs:140 +// ... +// +// (Both ~7 MB rows are V8 inline-attribution duplicates of the same +// logical call.) That's 14.5 MB of Map traffic for one Map -- one +// `set` per indirect object during load, with the hash table +// rebuilding through ~14 doubling steps to fit the book's ~9 k +// indirect objects, discarding each intermediate arena to GC. +// +// PDFRefs are overwhelmingly gen=0 (revisions / incremental updates +// are the only gen!=0 producers, and they're rare). fast-refs.mjs +// already exploits this on the key side -- a dense array indexed by +// objectNumber for the PDFRef pool, Map fallback for gen!=0. This +// shim does the same on the value side for PDFContext.indirectObjects. +// +// Mechanism. Patch PDFContext.prototype.assign / lookup / lookupMaybe +// / delete / getObjectRef / enumerateIndirectObjects to consult an +// auxiliary `this._objArr` (dense array indexed by objectNumber) for +// gen=0 PDFRefs first, falling back to the original Map for gen!=0. +// The dense array is created lazily on first assign so we don't need +// to touch the constructor. +// +// The original `this.indirectObjects` Map is left in place for two +// reasons: (a) gen!=0 entries actually need it, and (b) external code +// that reads `pdfContext.indirectObjects` directly (none in our +// pipeline, but reasonable to defensive-preserve) continues to see a +// Map-shaped object -- just usually empty. +// +// As a side benefit, `enumerateIndirectObjects` no longer needs to +// sort: dense-array iteration is already in ascending objectNumber +// order. (The Map-sourced gen!=0 entries are merged in sorted.) +// +// Side-effecting import. Import once before any PDFDocument.load: +// +// import "./lib/fast-indirect-objects.mjs"; +// +// Idempotent -- repeated imports do nothing after the first. + +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const PDFContext = require('pdf-lib/cjs/core/PDFContext.js').default; +const PDFRef = require('pdf-lib/cjs/core/objects/PDFRef.js').default; +const PDFNull = require('pdf-lib/cjs/core/objects/PDFNull.js').default; +const UnexpectedObjectTypeError = require('pdf-lib/cjs/core/errors.js').UnexpectedObjectTypeError; + +const byAscendingObjectNumber = ([a], [b]) => a.objectNumber - b.objectNumber; + +if (!PDFContext.prototype.__fastIndirectObjectsInstalled) { + + // ---- assign ------------------------------------------------------- + // Hot path. gen=0 → dense array store; gen!=0 → Map. Maintains + // largestObjectNumber as before. + + PDFContext.prototype.assign = function (ref, object) { + if (ref.generationNumber === 0) { + if (!this._objArr) this._objArr = []; + this._objArr[ref.objectNumber] = object; + } else { + this.indirectObjects.set(ref, object); + } + if (ref.objectNumber > this.largestObjectNumber) { + this.largestObjectNumber = ref.objectNumber; + } + }; + + // ---- delete ------------------------------------------------------- + // Returns true iff something was removed. Dense slots are nulled + // (not spliced) so subsequent objectNumbers retain their slots. + + PDFContext.prototype.delete = function (ref) { + if (ref.generationNumber === 0 && this._objArr) { + const slot = this._objArr[ref.objectNumber]; + if (slot !== undefined) { + this._objArr[ref.objectNumber] = undefined; + return true; + } + return false; + } + return this.indirectObjects.delete(ref); + }; + + // ---- lookup / lookupMaybe ----------------------------------------- + // Resolve the ref to an object via the dense array (gen=0) or Map + // (gen!=0), then run the original type-check tail verbatim. + + function _resolve(ctx, ref) { + if (!(ref instanceof PDFRef)) return ref; + if (ref.generationNumber === 0 && ctx._objArr) { + return ctx._objArr[ref.objectNumber]; + } + return ctx.indirectObjects.get(ref); + } + + PDFContext.prototype.lookupMaybe = function (ref) { + const types = []; + for (let i = 1, len = arguments.length; i < len; i++) types[i - 1] = arguments[i]; + const preservePDFNull = types.includes(PDFNull); + const result = _resolve(this, ref); + if (!result || (result === PDFNull && !preservePDFNull)) return undefined; + for (let idx = 0, len = types.length; idx < len; idx++) { + const type = types[idx]; + if (type === PDFNull) { + if (result === PDFNull) return result; + } else { + if (result instanceof type) return result; + } + } + throw new UnexpectedObjectTypeError(types, result); + }; + + PDFContext.prototype.lookup = function (ref) { + const types = []; + for (let i = 1, len = arguments.length; i < len; i++) types[i - 1] = arguments[i]; + const result = _resolve(this, ref); + if (types.length === 0) return result; + for (let idx = 0, len = types.length; idx < len; idx++) { + const type = types[idx]; + if (type === PDFNull) { + if (result === PDFNull) return result; + } else { + if (result instanceof type) return result; + } + } + throw new UnexpectedObjectTypeError(types, result); + }; + + // ---- getObjectRef ------------------------------------------------- + // Linear scan. Dense array first (gen=0 PDFRef reconstructed from + // objectNumber via PDFRef.of, which fast-refs has cached). Fall + // back to Map for any gen!=0 candidates. + + PDFContext.prototype.getObjectRef = function (pdfObject) { + if (this._objArr) { + for (let i = 0, len = this._objArr.length; i < len; i++) { + if (this._objArr[i] === pdfObject) return PDFRef.of(i, 0); + } + } + for (const entry of this.indirectObjects) { + if (entry[1] === pdfObject) return entry[0]; + } + return undefined; + }; + + // ---- enumerateIndirectObjects ------------------------------------- + // Dense array is already iterable in objectNumber order. Merge in + // any gen!=0 entries from the Map and sort once -- but only if the + // Map is non-empty (the common case for parsed PDFs is empty). + + PDFContext.prototype.enumerateIndirectObjects = function () { + const out = []; + if (this._objArr) { + for (let i = 0, len = this._objArr.length; i < len; i++) { + const obj = this._objArr[i]; + if (obj !== undefined) out.push([PDFRef.of(i, 0), obj]); + } + } + if (this.indirectObjects.size === 0) return out; + for (const entry of this.indirectObjects) out.push(entry); + return out.sort(byAscendingObjectNumber); + }; + + PDFContext.prototype.__fastIndirectObjectsInstalled = true; +} diff --git a/docs/lib/fast-inflate.mjs b/docs/lib/fast-inflate.mjs new file mode 100644 index 0000000..db675d5 --- /dev/null +++ b/docs/lib/fast-inflate.mjs @@ -0,0 +1,39 @@ +// Replace pako's pure-JS inflate with Node's zlib for the one path +// pdf-lib actually uses it on: PDFCrossRefStreamParser inflating the +// compressed cross-reference stream during PDFDocument.load. Exactly +// one call per load on Chrome-emitted PDFs (PDF 1.5+ xref-stream +// format), ~4.5 KB input. Negligible wall-clock, but it's the last +// remaining pdf-lib -> pako call site once parallelSave has taken +// over the deflate side -- this brings the runtime pako call count +// to zero. +// +// PDF /FlateDecode (ISO 32000-1 §7.4.4) is the zlib format (RFC 1950): +// 2-byte zlib header + raw deflate body (RFC 1951) + 4-byte Adler-32 +// trailer. Both pako.inflate and zlib.inflateSync consume that +// format, so the swap is wire-compatible. +// +// Mechanism: pdf-lib is CJS in node_modules and calls +// `require("pako").inflate(...)` at the call site, not at import +// time. Mutating the live pako exports object is enough; no fork +// required. +// +// Side-effecting import. Import once before PDFDocument.load runs: +// +// import "./lib/fast-inflate.mjs"; +// +// Idempotent -- repeated imports do nothing after the first. + +import { inflateSync } from "node:zlib"; +import pako from "pako"; + +if (!pako.__fastInflateInstalled) { + const original = pako.inflate; + pako.inflate = function fastInflate(data, options) { + // pdf-lib's only caller passes no options. Anything fancier + // (dictionary, raw, custom windowBits) goes back to pako so we + // don't change behaviour outside the one path we care about. + if (options) return original.call(pako, data, options); + return inflateSync(data); + }; + pako.__fastInflateInstalled = true; +} diff --git a/docs/lib/fast-number-to-string.mjs b/docs/lib/fast-number-to-string.mjs new file mode 100644 index 0000000..57640a9 --- /dev/null +++ b/docs/lib/fast-number-to-string.mjs @@ -0,0 +1,65 @@ +// Skip pdf-lib's numberToString redundant work when the input doesn't +// stringify to exponential notation. +// +// The upstream numberToString +// ([numbers.js:13](node_modules/pdf-lib/cjs/utils/numbers.js:13)) is: +// +// exports.numberToString = function (num) { +// var numStr = String(num); +// if (Math.abs(num) < 1.0) { +// var e = parseInt(num.toString().split('e-')[1]); +// if (e) { ... } +// } else { +// var e = parseInt(num.toString().split('+')[1]); +// if (e > 20) { ... } +// } +// return numStr; +// }; +// +// It always computes `numStr = String(num)` up front -- but then +// re-calls `num.toString()`, allocates a `.split(...)` array, and +// runs parseInt on the result, even though `numStr` is already what +// `.toString()` returns. Exponential notation in `String(num)` only +// appears for |num| < 1e-6 or |num| >= 1e21, neither of which real +// PDFs emit: object refs, generations, byte offsets, content-stream +// coordinates, /Size, /Length, etc. all stringify to plain decimal. +// +// Shim: short-circuit when `String(num)` contains no `'e'` and return +// it immediately. The rare exponential cases fall through to the +// original so the spec-compliant expansion logic is preserved. +// +// Why three patches and not one: pdf-lib ships compiled against +// tslib 1.x, whose `__exportStar` does a value-copy (`exports[p] = +// m[p]`) rather than installing a live getter. So by the time +// PDFNumber.js's `index_1.numberToString(value)` runs, `index_1` (the +// utils/index barrel) holds a captured reference to the original +// function, and mutating `numbers.numberToString` alone is invisible +// to the call site. We patch the captured copies along the re-export +// chain: utils/numbers (source), utils/index (the barrel PDFNumber +// reads from), and pdf-lib's top-level index (the public surface). +// +// Side-effecting import. Import once before any pdf-lib operation: +// +// import "./lib/fast-number-to-string.mjs"; +// +// Idempotent -- repeated imports do nothing after the first. + +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const numbers = require('pdf-lib/cjs/utils/numbers.js'); +const utilsBarrel = require('pdf-lib/cjs/utils/index.js'); +const topBarrel = require('pdf-lib/cjs/index.js'); + +if (!numbers.__fastNumberToStringInstalled) { + const original = numbers.numberToString; + const fastNumberToString = function fastNumberToString(num) { + const numStr = String(num); + if (numStr.indexOf('e') === -1) return numStr; + return original(num); + }; + numbers.numberToString = fastNumberToString; + utilsBarrel.numberToString = fastNumberToString; + topBarrel.numberToString = fastNumberToString; + numbers.__fastNumberToStringInstalled = true; +} diff --git a/docs/lib/fast-parse-dict.mjs b/docs/lib/fast-parse-dict.mjs new file mode 100644 index 0000000..203549c --- /dev/null +++ b/docs/lib/fast-parse-dict.mjs @@ -0,0 +1,87 @@ +// Hoist the four sentinel PDFName.of calls out of +// PDFObjectParser.prototype.parseDict. +// +// The upstream parseDict +// ([PDFObjectParser.js:141](node_modules/pdf-lib/cjs/core/parser/PDFObjectParser.js:141)) +// ends every dict it parses with a Type-dispatch tail: +// +// var Type = dict.get(PDFName.of('Type')); +// if (Type === PDFName.of('Catalog')) return PDFCatalog.fromMapWithContext(...); +// else if (Type === PDFName.of('Pages')) return PDFPageTree.fromMapWithContext(...); +// else if (Type === PDFName.of('Page')) return PDFPageLeaf.fromMapWithContext(...); +// else return PDFDict.fromMapWithContext(...); +// +// That's 4 PDFName.of calls per dict, even on the overwhelming +// majority (resource dicts, font descriptors, content-stream dicts) +// that have no /Type entry at all. With --fast-decode-name in +// effect each call collapses to a Map.get on fastCache, but +// fastOf is still the #4 row in process.cpuprofile (~80 ms, +// 5.2 %). +// +// PDFName instances are pool-deduped +// ([PDFName.js:18,100](node_modules/pdf-lib/cjs/core/objects/PDFName.js:18)) +// so the sentinel "Type" / "Catalog" / "Pages" / "Page" PDFNames +// are reference-stable for the entire load. Capture them once at +// shim-load time and substitute direct constants for the four +// PDFName.of calls inside parseDict. The rest of the function +// body is preserved verbatim -- same loop, same dict.set, same +// dispatch shape. +// +// Mechanism: PDFObjectParser isn't re-exported by pdf-lib's index, +// so we reach in through the CJS internals via createRequire (same +// shape as fast-parse-number.mjs / fast-dict-iter.mjs). Mutating +// PDFObjectParser.prototype.parseDict is global -- every parser +// instance created after this shim loads picks it up. +// +// Side-effecting import. Import once before PDFDocument.load runs: +// +// import "./lib/fast-parse-dict.mjs"; +// +// Idempotent -- repeated imports do nothing after the first. + +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const PDFObjectParser = require('pdf-lib/cjs/core/parser/PDFObjectParser.js').default; +const PDFName = require('pdf-lib/cjs/core/objects/PDFName.js').default; +const PDFDict = require('pdf-lib/cjs/core/objects/PDFDict.js').default; +const PDFCatalog = require('pdf-lib/cjs/core/structures/PDFCatalog.js').default; +const PDFPageTree = require('pdf-lib/cjs/core/structures/PDFPageTree.js').default; +const PDFPageLeaf = require('pdf-lib/cjs/core/structures/PDFPageLeaf.js').default; +const CharCodes = require('pdf-lib/cjs/core/syntax/CharCodes.js').default; + +// Capture canonical PDFName instances. Pool-dedup guarantees the +// parser would have built === these even if the original parseDict +// were still in play. +const TypeName = PDFName.of('Type'); +const CatalogName = PDFName.of('Catalog'); +const PagesName = PDFName.of('Pages'); +const PageName = PDFName.of('Page'); + +if (!PDFObjectParser.prototype.__fastParseDictInstalled) { + PDFObjectParser.prototype.parseDict = function fastParseDict() { + const bytes = this.bytes; + bytes.assertNext(CharCodes.LessThan); + bytes.assertNext(CharCodes.LessThan); + this.skipWhitespaceAndComments(); + const dict = new Map(); + while (!bytes.done() && + bytes.peek() !== CharCodes.GreaterThan && + bytes.peekAhead(1) !== CharCodes.GreaterThan) { + const key = this.parseName(); + const value = this.parseObject(); + dict.set(key, value); + this.skipWhitespaceAndComments(); + } + this.skipWhitespaceAndComments(); + bytes.assertNext(CharCodes.GreaterThan); + bytes.assertNext(CharCodes.GreaterThan); + const Type = dict.get(TypeName); + if (Type === CatalogName) return PDFCatalog.fromMapWithContext(dict, this.context); + if (Type === PagesName) return PDFPageTree.fromMapWithContext(dict, this.context); + if (Type === PageName) return PDFPageLeaf.fromMapWithContext(dict, this.context); + return PDFDict.fromMapWithContext(dict, this.context); + }; + + PDFObjectParser.prototype.__fastParseDictInstalled = true; +} diff --git a/docs/lib/fast-parse-name.mjs b/docs/lib/fast-parse-name.mjs new file mode 100644 index 0000000..5da62fa --- /dev/null +++ b/docs/lib/fast-parse-name.mjs @@ -0,0 +1,146 @@ +// Byte-keyed cache in front of parseName: on cache hit (99.7 % of +// calls on the book) return the existing PDFName without allocating +// the lookup string at all. +// +// Step 1 of this optimisation (commit history shows the failed +// attempt) hand-inlined parseName's byte loop to skip the +// `this.bytes.peek() / .next() / .done()` per-byte method dispatch +// while keeping the original cons-string accumulator. CPU didn't move: +// V8 was already optimising the cons-string path well, and the saved +// method-call cost just shifted attribution to the callers +// (fastParseDictOneBuf / fastParseObject). Heap was flat too. +// +// This shim attacks the actual transient cost: each call builds a +// throwaway string (cons-chain of ~8 chars on average, then flattened +// on first use) only to hand it to PDFName.of, which hashes the string +// against a Map and returns the cached instance. +// 1.68 M calls × ~10-byte average × cons-string allocations + Map.get +// hashing-the-string-again adds up to non-trivial heap throughput and +// CPU even though the per-call work is small. +// +// PDF names are 4 787 unique on the book vs 1 681 225 calls -- 99.7 % +// hit rate. So 99.7 % of those string allocations + Map hashings are +// pure overhead: the answer was already computed, we just needed a +// way to find it without rebuilding the key. +// +// The byte-cache. Keyed by `Uint8Array.prototype.hash`-ish value +// (Java-style `hash * 31 + byte`), valued by the cached PDFName. +// Each bucket stores `Entry` (single-entry, the common case for ~99 % +// of buckets) or `Entry[]` (collision, vanishingly rare for the 4.8 k +// unique names hashed into 2^32 space). Entry holds the bytes-key +// (a small Uint8Array copy of the name body) for collision-check +// equality. +// +// Cold path. On byte-cache miss, build the string via +// `String.fromCharCode` (one allocation, not the per-byte cons chain +// because we already have the full byte range from the scan) and +// call the upstream `PDFName.of` -- which on this stack means +// fast-decode-name's string-keyed cache, which returns the PDFName +// (cache hit on the string side) or constructs it. Either way, the +// PDFName instance gets cached in the byte-cache for next time. +// Both caches converge on the same PDFName instance per logical name. +// +// Composes with fast-decode-name (their caches see different keys for +// the same logical name; both return the same PDFName via this fall- +// back chain). Direct `PDFName.of(...)` calls from non-parser code +// (setOutline, setMetadata) bypass the byte-cache and go straight +// through fast-decode-name -- correct, since those calls don't have +// a byte range to work with. +// +// Side-effecting import. Import once before PDFDocument.load runs; +// idempotent. + +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const PDFObjectParser = require('pdf-lib/cjs/core/parser/PDFObjectParser.js').default; +const PDFName = require('pdf-lib/cjs/core/objects/PDFName.js').default; +const CharCodes = require('pdf-lib/cjs/core/syntax/CharCodes.js').default; +const { IsWhitespace } = require('pdf-lib/cjs/core/syntax/Whitespace.js'); +const { IsDelimiter } = require('pdf-lib/cjs/core/syntax/Delimiters.js'); + +const FORWARD_SLASH = CharCodes.ForwardSlash; + +// hash -> Entry | Entry[]. Single-entry buckets store the Entry +// directly; on collision we promote to an array. Entry shape is fixed +// (bytes + name) so V8 gives it a stable hidden class. +const byteCache = new Map(); + +class Entry { + constructor(bytes, name) { + this.bytes = bytes; + this.name = name; + } +} + +function _bytesEqual(a, buf, start, end) { + if (a.length !== end - start) return false; + for (let i = 0; i < a.length; i++) { + if (a[i] !== buf[start + i]) return false; + } + return true; +} + +if (!PDFObjectParser.prototype.__fastParseNameInstalled) { + const orig = PDFObjectParser.prototype.parseName; + + PDFObjectParser.prototype.parseName = function fastParseName() { + const stream = this.bytes; + const buf = stream.bytes; + const len = stream.length; + let idx = stream.idx; + + // assertNext(ForwardSlash). Fall back on the unexpected path. + if (idx >= len || buf[idx] !== FORWARD_SLASH) { + return orig.call(this); + } + idx++; + + // Scan body + compute hash in one pass. Java-style hashCode + // (`hash * 31 + byte`) -- monomorphic Smi math, no allocations. + const start = idx; + let hash = 0; + while (idx < len) { + const byte = buf[idx]; + if (IsWhitespace[byte] || IsDelimiter[byte]) break; + hash = (hash * 31 + byte) | 0; + idx++; + } + stream.idx = idx; + + // Look up the byte-cache. + const bucket = byteCache.get(hash); + if (bucket !== undefined) { + if (bucket instanceof Entry) { + if (_bytesEqual(bucket.bytes, buf, start, idx)) return bucket.name; + } else { + // Collision: rare. Linear scan of the bucket. + for (let i = 0; i < bucket.length; i++) { + const e = bucket[i]; + if (_bytesEqual(e.bytes, buf, start, idx)) return e.name; + } + } + } + + // Miss. Build the lookup string in one shot (no cons-chain -- + // String.fromCharCode handles bytes 0-255 directly) and route + // through the upstream PDFName.of (which on this stack is + // fast-decode-name's string-keyed cache). The resulting PDFName + // is the canonical instance; cache it in the byte-cache for next + // time so subsequent calls with the same bytes hit here. + const slice = buf.subarray(start, idx); + const name = PDFName.of(String.fromCharCode.apply(null, slice)); + const key = new Uint8Array(slice); // copy for stable cache key + const entry = new Entry(key, name); + if (bucket === undefined) { + byteCache.set(hash, entry); + } else if (bucket instanceof Entry) { + byteCache.set(hash, [bucket, entry]); + } else { + bucket.push(entry); + } + return name; + }; + + PDFObjectParser.prototype.__fastParseNameInstalled = true; +} diff --git a/docs/lib/fast-parse-number.mjs b/docs/lib/fast-parse-number.mjs new file mode 100644 index 0000000..0f202d0 --- /dev/null +++ b/docs/lib/fast-parse-number.mjs @@ -0,0 +1,151 @@ +// Replace pdf-lib's BaseParser.parseRawNumber and BaseParser.parseRawInt +// with direct-integer accumulators that skip per-byte string +// concatenation, charFromCode calls, and the trailing Number() +// string-parse round-trip. +// +// The upstream implementations +// ([BaseParser.js:17 + :33](node_modules/pdf-lib/cjs/core/parser/BaseParser.js:17)) +// build `value` one character at a time via `value += charFromCode(byte)`, +// then call `Number(value)` to convert the string back to a number, +// then perform `isFinite` (and for parseRawNumber, MAX_SAFE_INTEGER) +// guards on every call. Every numeric token in a PDF flows through +// these paths: parseRawNumber via PDFObjectParser.parseNumberOrRef +// (once per number, twice per indirect ref), parseRawInt via +// PDFParser.parseIndirectObjectHeader (twice per indirect object) and +// PDFObjectStreamParser (twice per object inside an ObjStm). On the +// book this fires hundreds of thousands of times and allocates a +// throwaway string per call. +// +// The fast path accumulates the integer directly (n = n*10 + (byte - +// 0x30)). parseRawNumber additionally descends into decimal handling +// when a period appears. Both fall back to the original for: +// - Numbers with > 15 integer digits (where direct accumulation +// could exceed Number.MAX_SAFE_INTEGER and lose precision). +// - Empty-digit cases (e.g., bare sign or lone "."), so upstream's +// NumberParsingError keeps its diagnostic context. +// Both fallback paths are vanishingly rare on real PDFs. +// +// Mechanism: BaseParser isn't re-exported by pdf-lib's index, so we +// import it via the package's CJS internal path through createRequire. +// Mutating BaseParser.prototype affects every subclass (PDFParser, +// PDFObjectParser, PDFObjectStreamParser, PDFXRefStreamParser). +// +// Side-effecting import. Import once before PDFDocument.load runs: +// +// import "./lib/fast-parse-number.mjs"; +// +// Idempotent -- repeated imports do nothing after the first. + +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const BaseParser = require('pdf-lib/cjs/core/parser/BaseParser.js').default; +const { IsDigit } = require('pdf-lib/cjs/core/syntax/Numeric.js'); + +const ZERO = 0x30; // '0' +const PERIOD = 0x2E; // '.' +const PLUS = 0x2B; // '+' +const MINUS = 0x2D; // '-' + +// Number.MAX_SAFE_INTEGER == 9007199254740991 (16 digits). 15-digit +// integers are guaranteed to accumulate exactly without precision loss. +const MAX_SAFE_INT_DIGITS = 15; + +if (!BaseParser.__fastParseNumberInstalled) { + const origParseRawNumber = BaseParser.prototype.parseRawNumber; + const origParseRawInt = BaseParser.prototype.parseRawInt; + + BaseParser.prototype.parseRawInt = function fastParseRawInt() { + const bytes = this.bytes; + const start = bytes.offset(); + + let n = 0; + let digits = 0; + let byte = bytes.peek(); + while (!bytes.done() && IsDigit[byte]) { + if (digits >= MAX_SAFE_INT_DIGITS) { + bytes.moveTo(start); + return origParseRawInt.call(this); + } + n = n * 10 + (byte - ZERO); + digits++; + bytes.next(); + byte = bytes.peek(); + } + if (digits === 0) { + bytes.moveTo(start); + return origParseRawInt.call(this); + } + return n; + }; + + BaseParser.prototype.parseRawNumber = function fastParseRawNumber() { + const bytes = this.bytes; + const start = bytes.offset(); + + // Sign + let byte = bytes.peek(); + let neg = false; + if (byte === PLUS) { + bytes.next(); + byte = bytes.peek(); + } else if (byte === MINUS) { + neg = true; + bytes.next(); + byte = bytes.peek(); + } + + // Integer part + let intPart = 0; + let intDigits = 0; + while (!bytes.done() && IsDigit[byte]) { + if (intDigits >= MAX_SAFE_INT_DIGITS) { + // Precision risk -- rewind and delegate to upstream's Number() + // path, which retains correctly-rounded double precision and + // emits the spec-mandated warning above MAX_SAFE_INTEGER. + bytes.moveTo(start); + return origParseRawNumber.call(this); + } + intPart = intPart * 10 + (byte - ZERO); + intDigits++; + bytes.next(); + byte = bytes.peek(); + } + + if (byte !== PERIOD) { + if (intDigits === 0) { + // Empty number (e.g., bare sign with no digits). Rewind and + // let upstream throw NumberParsingError with full context. + bytes.moveTo(start); + return origParseRawNumber.call(this); + } + return neg ? -intPart : intPart; + } + + // Consume period + bytes.next(); + byte = bytes.peek(); + + // Decimal part + let frac = 0; + let scale = 1; + while (!bytes.done() && IsDigit[byte]) { + frac = frac * 10 + (byte - ZERO); + scale *= 10; + bytes.next(); + byte = bytes.peek(); + } + + if (intDigits === 0 && scale === 1) { + // Lone "." with no digits on either side. Rewind to let upstream + // throw NumberParsingError. + bytes.moveTo(start); + return origParseRawNumber.call(this); + } + + const value = frac === 0 ? intPart : intPart + frac / scale; + return neg ? -value : value; + }; + + BaseParser.__fastParseNumberInstalled = true; +} diff --git a/docs/lib/fast-parse-object.mjs b/docs/lib/fast-parse-object.mjs new file mode 100644 index 0000000..e573dc4 --- /dev/null +++ b/docs/lib/fast-parse-object.mjs @@ -0,0 +1,92 @@ +// Dispatch PDFObjectParser.parseObject by first byte; gate the three +// keyword scans behind a byte check. +// +// The upstream parseObject +// ([PDFObjectParser.js:36](node_modules/pdf-lib/cjs/core/parser/PDFObjectParser.js:36)) +// runs three speculative matchKeyword calls (true / false / null) +// before peeking the dispatch byte: +// +// parseObject() { +// this.skipWhitespaceAndComments(); +// if (this.matchKeyword(Keywords.true)) return PDFBool.True; +// if (this.matchKeyword(Keywords.false)) return PDFBool.False; +// if (this.matchKeyword(Keywords.null)) return PDFNull; +// var byte = this.bytes.peek(); +// ... +// } +// +// parseObject is called for every dict value, array element, and +// indirect-object body -- same call density as fastParseDict, which +// is the #2 row in the process profile. true / false / null are +// extraordinarily rare in real PDFs (boolean / null entries on +// individual dict values, mostly), so the three matchKeyword calls +// fail-and-rewind on essentially every invocation. Each failure +// still pays bytes.offset() + bytes.next() + comparison + +// bytes.moveTo(initialOffset). +// +// This shim flips the dispatch: peek the first byte, branch by byte +// for the structural tokens, and only enter matchKeyword when the +// byte is `t` / `f` / `n` (i.e. could plausibly start the keyword). +// Dispatch order is by observed frequency in dict-value position: +// numbers / refs first (digits + sign + period), then dicts (<<), +// names (/), arrays ([), strings ((), hex strings (<). Same +// semantics -- a value starting with `t`/`f`/`n` that isn't a +// keyword still falls through to the same PDFObjectParsingError +// throw. +// +// Mechanism: PDFObjectParser isn't re-exported from pdf-lib's index, +// so we reach in through the CJS internals via createRequire (same +// shape as fast-parse-dict.mjs). Mutating +// PDFObjectParser.prototype.parseObject is global -- every parser +// instance created after this shim loads picks it up. +// +// Side-effecting import. Import once before PDFDocument.load runs: +// +// import "./lib/fast-parse-object.mjs"; +// +// Idempotent -- repeated imports do nothing after the first. + +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const PDFObjectParser = require('pdf-lib/cjs/core/parser/PDFObjectParser.js').default; +const PDFBool = require('pdf-lib/cjs/core/objects/PDFBool.js').default; +const PDFNull = require('pdf-lib/cjs/core/objects/PDFNull.js').default; +const CharCodes = require('pdf-lib/cjs/core/syntax/CharCodes.js').default; +const { Keywords } = require('pdf-lib/cjs/core/syntax/Keywords.js'); +const { IsNumeric } = require('pdf-lib/cjs/core/syntax/Numeric.js'); +const { PDFObjectParsingError } = require('pdf-lib/cjs/core/errors.js'); + +const KwTrue = Keywords.true; +const KwFalse = Keywords.false; +const KwNull = Keywords.null; + +const LessThan = CharCodes.LessThan; +const ForwardSlash = CharCodes.ForwardSlash; +const LeftSquareBracket = CharCodes.LeftSquareBracket; +const LeftParen = CharCodes.LeftParen; +const t_code = CharCodes.t; +const f_code = CharCodes.f; +const n_code = CharCodes.n; + +if (!PDFObjectParser.prototype.__fastParseObjectInstalled) { + PDFObjectParser.prototype.parseObject = function fastParseObject() { + this.skipWhitespaceAndComments(); + const bytes = this.bytes; + const byte = bytes.peek(); + if (IsNumeric[byte]) return this.parseNumberOrRef(); + if (byte === LessThan) { + if (bytes.peekAhead(1) === LessThan) return this.parseDictOrStream(); + return this.parseHexString(); + } + if (byte === ForwardSlash) return this.parseName(); + if (byte === LeftSquareBracket) return this.parseArray(); + if (byte === LeftParen) return this.parseString(); + if (byte === t_code && this.matchKeyword(KwTrue)) return PDFBool.True; + if (byte === f_code && this.matchKeyword(KwFalse)) return PDFBool.False; + if (byte === n_code && this.matchKeyword(KwNull)) return PDFNull; + throw new PDFObjectParsingError(bytes.position(), byte); + }; + + PDFObjectParser.prototype.__fastParseObjectInstalled = true; +} diff --git a/docs/lib/fast-pdfnumber-pool.mjs b/docs/lib/fast-pdfnumber-pool.mjs new file mode 100644 index 0000000..b0ee999 --- /dev/null +++ b/docs/lib/fast-pdfnumber-pool.mjs @@ -0,0 +1,61 @@ +// Pool PDFNumber instances by value. +// +// After fast-refs / fast-indirect-objects / fast-dict-array shipped, +// the residual heap profile attributed ~15 MB of self-size to +// PDFObjectParser.parseNumberOrRef -- mostly inlined `new +// PDFNumber(value)` calls (each of which also allocates a fresh +// stringValue via `numberToString(value)`): +// +// function PDFNumber(value) { +// var _this = _super.call(this) || this; +// _this.numberValue = value; +// _this.stringValue = numberToString(value); // allocs +// return _this; +// } +// PDFNumber.of = function (value) { return new PDFNumber(value); }; +// +// No pool. Every PDFNumber.of(N) returns a fresh instance, even +// though PDFs are packed with repeated numeric values: page indices +// 0..1651, /Count totals, /N object-stream lengths, common +// /MediaBox dimensions (612, 792, 595, 842), font sizes, bit +// widths. The book parses hundreds of thousands of PDFNumber.of +// calls against a few thousand unique values. +// +// Shim. Dense array indexed by `value` for non-negative small +// integers (0..POOL_SIZE-1, currently 16384 -- covers all observed +// integer values in the book by a wide margin). Map fallback for +// floats, negatives, and out-of-range integers. Same shape as +// fast-refs on the PDFRef side. PDFNumber is immutable +// (numberValue and stringValue are set in the constructor and never +// mutated), so sharing instances is safe. +// +// Side-effecting import. Import once before any pdf-lib operation. +// Idempotent. + +import { PDFNumber } from "pdf-lib"; + +const POOL_SIZE = 16384; + +if (!PDFNumber.__fastPoolInstalled) { + const original = PDFNumber.of; + const intPool = new Array(POOL_SIZE); // sparse, holes for unused slots + const otherPool = new Map(); // floats / negatives / large ints + + PDFNumber.of = function fastNumberOf(value) { + // Hot path: non-negative integer within pool range. + if (value >= 0 && value < POOL_SIZE && (value | 0) === value) { + let pn = intPool[value]; + if (pn !== undefined) return pn; + pn = original.call(PDFNumber, value); + intPool[value] = pn; + return pn; + } + // Cold path: Map cache. SameValueZero handles NaN / -0 correctly. + let pn = otherPool.get(value); + if (pn !== undefined) return pn; + pn = original.call(PDFNumber, value); + otherPool.set(value, pn); + return pn; + }; + PDFNumber.__fastPoolInstalled = true; +} diff --git a/docs/lib/fast-refs-class.mjs b/docs/lib/fast-refs-class.mjs new file mode 100644 index 0000000..c1c11e2 --- /dev/null +++ b/docs/lib/fast-refs-class.mjs @@ -0,0 +1,130 @@ +// fast-refs variant: use a class-style constructor for stable hidden class. +// +// fast-refs.mjs builds PDFRef instances with +// `Object.create(PDFRef.prototype) + fresh.objectNumber = ... + fresh.gen = ...`. +// V8 treats objects built that way as transitioning through intermediate +// hidden-class maps as each property is added, and the result is roughly +// twice as large per instance as a `new`-built object with the same +// fields. Empirically on the book, PDFRef sits at ~60 B/instance via +// fast-refs whereas PDFName (built via `new PDFName(...)`) sits at ~31 B. +// +// This shim swaps the `Object.create + writes` pattern for a constructor +// that sets both fields in one shot, giving V8 a stable hidden class +// from the first instance. +// +// Two-shape variant: most PDFRefs on fresh-Chrome workloads are gen=0 +// and don't need to carry generationNumber at all. We allocate them via +// _FastRef (single `objectNumber` inline slot) and let the prototype +// supply a default `generationNumber = 0`. The rare gen!=0 path (PDF +// spec allows it; our workload only hits it for the xref "free" entry +// at object 0) uses _FastRefGen with both fields as own data properties. +// V8 sees a bounded 2-shape polymorphism on PDFRef.prototype, and the +// monomorphic hot path (gen=0 instances) keeps inline-field-read speed +// for `.objectNumber` and `.generationNumber` reads -- no accessor- +// property boundary to break inlining at upstream pdf-lib call sites +// (PDFCrossRefSection.append, PDFCrossRefStream entry tuples, +// PDFWriter.serializeToBuffer, our fast-indirect-objects shim, ...). +// +// Expected per-gen=0 instance: header (8 B) + 1 inline slot (4 B) = 12 B +// raw, aligned to 16 B by V8 -- versus 12 + 2*4 = 20 B raw, aligned to +// 24 B for a 2-slot instance. Saves 8 B per gen=0 PDFRef * ~226 k unique +// = ~1.8 MB heap on the book. +// +// Mutually exclusive with --fast-refs in the harness. + +import { PDFRef } from 'pdf-lib'; + +// ---- helpers (same as fast-refs.mjs, see commentary there) ------------- + +function _writeUint(buffer, offset, n) { + if (n < 10) { buffer[offset] = 0x30 + n; return 1; } + let m = n, d = 0; + while (m > 0) { d++; m = (m / 10) | 0; } + for (let i = d - 1; i >= 0; i--) { + buffer[offset + i] = 0x30 + (n % 10); + n = (n / 10) | 0; + } + return d; +} + +function _digitCount(n) { + if (n < 10) return 1; + if (n < 100) return 2; + if (n < 1000) return 3; + if (n < 10000) return 4; + if (n < 100000) return 5; + if (n < 1000000) return 6; + let d = 0; + while (n > 0) { d++; n = (n / 10) | 0; } + return d; +} + +// ---- the constructor-based fast PDFRef shapes -------------------------- + +// gen=0 instances: single inline `objectNumber` slot. `generationNumber` +// is supplied as a data-property default on PDFRef.prototype (set below), +// so reads return 0 without any accessor dispatch. +function _FastRef(objectNumber) { + this.objectNumber = objectNumber; +} +_FastRef.prototype = PDFRef.prototype; + +// gen!=0 instances: both fields as own data properties, shadowing the +// prototype default. V8 sees a second hidden class -- bounded 2-shape +// polymorphism, well-handled by inline caches. +function _FastRefGen(objectNumber, generationNumber) { + this.objectNumber = objectNumber; + this.generationNumber = generationNumber; +} +_FastRefGen.prototype = PDFRef.prototype; + +if (!PDFRef.__fastRefsClassInstalled) { + const pool0 = []; // dense gen=0 cache, indexed by objectNumber + const poolGenN = new Map(); // gen!=0 cache, keyed by "N M" string + + PDFRef.of = function fastClassOf(objectNumber, generationNumber) { + if (generationNumber === undefined || generationNumber === 0) { + const existing = pool0[objectNumber]; + if (existing) return existing; + const fresh = new _FastRef(objectNumber); + pool0[objectNumber] = fresh; + return fresh; + } + // gen != 0: this path is dead on fresh-Chrome workloads except for + // the xref "free" entry at object 0. Kept for spec correctness. + const key = objectNumber + ' ' + generationNumber; + const existing = poolGenN.get(key); + if (existing) return existing; + const fresh = new _FastRefGen(objectNumber, generationNumber); + poolGenN.set(key, fresh); + return fresh; + }; + + // Default generationNumber on the prototype. _FastRef instances inherit + // this (no own property); _FastRefGen instances shadow it with their + // own data property. Both look like data-property reads to V8's IC. + PDFRef.prototype.generationNumber = 0; + + // Hot prototype methods read `objectNumber` / `generationNumber` as + // regular data properties. The upstream `tag` string is gone -- no + // instance carries it any more. + PDFRef.prototype.toString = function () { + return this.objectNumber + ' ' + this.generationNumber + ' R'; + }; + + PDFRef.prototype.sizeInBytes = function () { + return _digitCount(this.objectNumber) + _digitCount(this.generationNumber) + 3; + }; + + PDFRef.prototype.copyBytesInto = function (buffer, offset) { + const start = offset; + offset += _writeUint(buffer, offset, this.objectNumber); + buffer[offset++] = 0x20; // ' ' + offset += _writeUint(buffer, offset, this.generationNumber); + buffer[offset++] = 0x20; // ' ' + buffer[offset++] = 0x52; // 'R' + return offset - start; + }; + + PDFRef.__fastRefsClassInstalled = true; +} diff --git a/docs/lib/fast-refs.mjs b/docs/lib/fast-refs.mjs new file mode 100644 index 0000000..beeb76a --- /dev/null +++ b/docs/lib/fast-refs.mjs @@ -0,0 +1,140 @@ +// Replace pdf-lib's PDFRef.of pool lookup with a dense-array cache +// for the generation=0 case (the overwhelmingly common one), AND +// drop the per-instance `tag` string entirely. +// +// The upstream implementation +// (node_modules/pdf-lib/cjs/core/objects/PDFRef.js) keys its pool by +// a freshly-built string ` R` on every call: +// +// var tag = objectNumber + " " + generationNumber + " R"; +// var instance = pool.get(tag); +// +// On the book we see ~1.2 M PDFRef.of calls per load, 82 % of them +// with gen=0; each call allocates the tag string before Map.get can +// hash it. That's ~330 ms of self-time on the process-phase profile +// plus measurable GC pressure. +// +// Shim part 1: dense array indexed by objectNumber for the gen=0 branch. +// Plain array indexing, no string alloc, no Map hash. On a gen=0 cache +// miss we construct the PDFRef directly via +// `Object.create(PDFRef.prototype)` plus manual field init, skipping +// both the ENFORCER check and the upstream `pool.set(tag, instance)`. +// +// Shim part 2: drop the per-instance `tag` field. Upstream caches +// ` R` on each PDFRef so toString / sizeInBytes / +// copyBytesInto can read it back. After fast-array-onebuf shipped, +// the heap profile showed PDFParser.parseIndirectObjectHeader sitting +// at 13.7 MB (25 % of total). The attribution chain (via +// perf/find-heap-callers.mjs): +// +// parseIndirectObjectHeader → skipJibberish (14.2 MB) +// → matchIndirectObjectHeader (try/catch wrapper) +// → parseIndirectObjectHeader → fastOf +// +// skipJibberish runs after every successful indirect object parse and +// speculatively calls matchIndirectObjectHeader to detect the next +// `N M obj` header. On valid PDFs the speculation always succeeds, so +// fastOf fires once per indirect-object boundary, populating the +// dense-array cache. The subsequent "real" parseIndirectObject then +// hits the cache. V8 inlines fastOf at this call site (small + hot +// from speculation) so the attribution lands on the caller -- 13.7 MB +// of which was the tag-string allocation (`objectNumber + ' 0 R'`): +// V8 builds 1-2 intermediate concat strings + the final ~25-35 B +// tag, ~150 k times. +// +// Eliminating the `tag` field collapses all of that. The prototype +// methods now compute their results from objectNumber / generationNumber +// directly. copyBytesInto writes digits straight into the output buffer +// with a no-allocation _writeUint helper; sizeInBytes returns +// digitCount(obj) + digitCount(gen) + 3 (for " " + " R"); toString +// builds on demand (only used for debug, no caching needed). +// +// gen != 0 PDFRefs constructed via the upstream path still have `tag` +// set by the upstream constructor -- our overrides ignore the field, +// so the tag string is allocated-then-wasted. gen != 0 is ~18 % of refs +// at ~50 K instances; the waste is bounded and not worth patching the +// constructor for. +// +// gen != 0 cache lookups (pdf-lib's xref-stream bookkeeping where +// "generation" encodes an in-ObjStm index per PDF 1.5 spec, see +// PDFXRefStreamParser.js:74-80) still pass through the original +// PDFRef.of -- their Map pool is harmless at gen!=0's volume. +// +// Side-effecting import. Import once before any pdf-lib operation. +// Idempotent. + +import { PDFRef } from "pdf-lib"; + +// Write n's decimal representation into buffer starting at offset. +// No allocations. Returns the number of bytes written. n must be a +// non-negative integer. +function _writeUint(buffer, offset, n) { + if (n < 10) { buffer[offset] = 0x30 + n; return 1; } + // Count digits. + let m = n, d = 0; + while (m > 0) { d++; m = (m / 10) | 0; } + // Write digits backwards. + for (let i = d - 1; i >= 0; i--) { + buffer[offset + i] = 0x30 + (n % 10); + n = (n / 10) | 0; + } + return d; +} + +// Non-allocating decimal digit count for non-negative integers. +// Ladder catches the common small-number cases without arithmetic. +function _digitCount(n) { + if (n < 10) return 1; + if (n < 100) return 2; + if (n < 1000) return 3; + if (n < 10000) return 4; + if (n < 100000) return 5; + if (n < 1000000) return 6; + let d = 0; + while (n > 0) { d++; n = (n / 10) | 0; } + return d; +} + +if (!PDFRef.__fastPoolInstalled) { + const original = PDFRef.of; + const pool0 = []; + PDFRef.of = function fastOf(objectNumber, generationNumber) { + if (generationNumber === undefined || generationNumber === 0) { + const existing = pool0[objectNumber]; + if (existing) return existing; + // Direct construction -- skip ENFORCER check, skip upstream pool.set, + // skip the per-instance `tag` string (the prototype methods now + // compute their results from objectNumber / generationNumber). + const fresh = Object.create(PDFRef.prototype); + fresh.objectNumber = objectNumber; + fresh.generationNumber = 0; + pool0[objectNumber] = fresh; + return fresh; + } + return original.call(PDFRef, objectNumber, generationNumber); + }; + + // Replace the upstream prototype methods to ignore `tag` entirely. + // Works for both gen=0 (tag is absent) and gen!=0 (tag is set by + // upstream's constructor but ignored). + + PDFRef.prototype.toString = function () { + return this.objectNumber + ' ' + this.generationNumber + ' R'; + }; + + PDFRef.prototype.sizeInBytes = function () { + return _digitCount(this.objectNumber) + _digitCount(this.generationNumber) + 3; + }; + + PDFRef.prototype.copyBytesInto = function (buffer, offset) { + const start = offset; + offset += _writeUint(buffer, offset, this.objectNumber); + buffer[offset++] = 0x20; // ' ' + offset += _writeUint(buffer, offset, this.generationNumber); + buffer[offset++] = 0x20; // ' ' + buffer[offset++] = 0x52; // 'R' + return offset - start; + }; + + PDFRef.__fastPoolInstalled = true; +} diff --git a/docs/lib/fast-size-in-bytes.mjs b/docs/lib/fast-size-in-bytes.mjs new file mode 100644 index 0000000..779ade4 --- /dev/null +++ b/docs/lib/fast-size-in-bytes.mjs @@ -0,0 +1,62 @@ +// Replace pdf-lib's utils.sizeInBytes -- which allocates a base-2 string +// just to count its bit length -- with a non-allocating short-circuit +// ladder. +// +// The upstream sizeInBytes +// ([numbers.js:37](node_modules/pdf-lib/cjs/utils/numbers.js:37)) is: +// +// exports.sizeInBytes = function (n) { +// return Math.ceil(n.toString(2).length / 8); +// }; +// +// It's called from PDFCrossRefStream.computeMaxEntryByteWidths (three +// calls per xref entry, ~50 k entries on the book) and from +// utils.bytesFor (to size the Uint8Array before filling it byte-by- +// byte, called from PDFCrossRefStream.getUnencodedContents). Both +// paths are part of writing the cross-reference stream. +// +// For the xref values the distribution is heavily skewed small: type +// is always 0/1/2 (1 byte), generationNumber is always 0 (1 byte), +// object-stream indices are small (1-2 bytes), and file offsets are +// 3-4 bytes for any sub-4GB PDF. A short-circuit ladder catches the +// dominant cases in one compare; the rare 5+ byte tail falls through +// to a Math.clz32-based fallback that's still allocation-free. +// +// Why patch three places (and why bytesFor isn't on the list): +// pdf-lib ships compiled against tslib 1.x, whose `__exportStar` +// does a value-copy (`exports[p] = m[p]`) rather than installing a +// live getter. So consumers that read sizeInBytes through a barrel +// (`utils_1.sizeInBytes(...)` from PDFCrossRefStream) hold a +// captured reference and won't see a mutation of `numbers.sizeInBytes` +// alone. Patch all three barrel layers (utils/numbers, utils/index, +// top-level index) to cover every observed call site. utils.bytesFor +// reads `exports.sizeInBytes` at call time from the same module +// object we mutate first, so it picks up the fast path without a +// separate patch. +// +// Side-effecting import. Import once before any pdf-lib operation: +// +// import "./lib/fast-size-in-bytes.mjs"; +// +// Idempotent -- repeated imports do nothing after the first. + +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const numbers = require('pdf-lib/cjs/utils/numbers.js'); +const utilsBarrel = require('pdf-lib/cjs/utils/index.js'); +const topBarrel = require('pdf-lib/cjs/index.js'); + +if (!numbers.__fastSizeInBytesInstalled) { + const fastSizeInBytes = function fastSizeInBytes(n) { + if (n < 0x100) return 1; + if (n < 0x10000) return 2; + if (n < 0x1000000) return 3; + if (n < 0x100000000) return 4; + return 4 + Math.ceil((32 - Math.clz32(Math.floor(n / 0x100000000))) / 8); + }; + numbers.sizeInBytes = fastSizeInBytes; + utilsBarrel.sizeInBytes = fastSizeInBytes; + topBarrel.sizeInBytes = fastSizeInBytes; + numbers.__fastSizeInBytesInstalled = true; +} diff --git a/docs/lib/fast-sync-load.mjs b/docs/lib/fast-sync-load.mjs new file mode 100644 index 0000000..1109247 --- /dev/null +++ b/docs/lib/fast-sync-load.mjs @@ -0,0 +1,345 @@ +// Strip pdf-lib's parseSpeed / objectsPerTick / shouldWaitForTick / +// waitForTick machinery entirely. Synchronify everywhere the conditional +// yield was the only async thing in the method. +// +// pdf-lib's parser and writers are downlevel-compiled from TypeScript +// `async function` to tslib's __awaiter + __generator state machine, +// so on browsers they can yield to the event loop every +// `objectsPerTick` objects via `await waitForTick()`. In Node with +// objectsPerTick: Infinity (which parseSpeed: Fastest historically +// set on the load side) the gate never fires -- the entire generator +// runs in one tick -- yet every indirect object (~50 k on the book) +// still pays the state-machine dispatch + Promise allocation for a +// single fall-through `case 0`. +// +// Eight methods participate in this pattern; this shim replaces all +// of them with synchronous (or, where a legitimate await remains, +// awaiterless `async`) twins: +// +// Load side (parser): +// PDFParser.prototype.parseDocument +// PDFParser.prototype.parseDocumentSection +// PDFParser.prototype.parseIndirectObjects +// PDFParser.prototype.parseIndirectObject +// PDFObjectStreamParser.prototype.parseIntoContext +// PDFDocument.load (static; only awaited parseDocument) +// +// Save side (writers): +// PDFWriter.prototype.serializeToBuffer +// (kept `async` because the inherited path awaits the +// ParallelStreamWriter override of computeBufferSize, which +// does genuine Promise.all-driven libuv-pool concurrency) +// PDFWriter.prototype.computeBufferSize +// PDFStreamWriter.prototype.computeBufferSize +// +// The load-side patches have to land together: each method awaits +// the next one down, so desugaring any one in isolation still leaves +// a Promise chain dangling. +// +// PDFDocument.load's signature is preserved (still callable as +// `await PDFDocument.load(bytes)`; awaiting a non-Promise resolves +// to the value), so existing call sites need no change. The +// parseSpeed option is silently ignored. parallel-deflate.mjs's +// parallelSave drops `objectsPerTick` from its public API in step +// with this shim. +// +// Side-effecting import. Import once before any pdf-lib operation: +// +// import "./lib/fast-sync-load.mjs"; +// +// Idempotent -- repeated imports do nothing after the first. + +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const PDFParser = require('pdf-lib/cjs/core/parser/PDFParser.js').default; +const PDFObjectStreamParser = require('pdf-lib/cjs/core/parser/PDFObjectStreamParser.js').default; +const PDFXRefStreamParser = require('pdf-lib/cjs/core/parser/PDFXRefStreamParser.js').default; +const PDFRawStream = require('pdf-lib/cjs/core/objects/PDFRawStream.js').default; +const PDFRef = require('pdf-lib/cjs/core/objects/PDFRef.js').default; +const PDFName = require('pdf-lib/cjs/core/objects/PDFName.js').default; +const PDFNumber = require('pdf-lib/cjs/core/objects/PDFNumber.js').default; +const PDFStream = require('pdf-lib/cjs/core/objects/PDFStream.js').default; +const PDFInvalidObject = require('pdf-lib/cjs/core/objects/PDFInvalidObject.js').default; +const PDFDocument = require('pdf-lib/cjs/api/PDFDocument.js').default; +const PDFWriter = require('pdf-lib/cjs/core/writers/PDFWriter.js').default; +const PDFStreamWriter = require('pdf-lib/cjs/core/writers/PDFStreamWriter.js').default; +const PDFHeader = require('pdf-lib/cjs/core/document/PDFHeader.js').default; +const PDFTrailer = require('pdf-lib/cjs/core/document/PDFTrailer.js').default; +const PDFTrailerDict = require('pdf-lib/cjs/core/document/PDFTrailerDict.js').default; +const PDFCrossRefSection = require('pdf-lib/cjs/core/document/PDFCrossRefSection.js').default; +const PDFCrossRefStream = require('pdf-lib/cjs/core/structures/PDFCrossRefStream.js').default; +const PDFObjectStream = require('pdf-lib/cjs/core/structures/PDFObjectStream.js').default; +const CharCodes = require('pdf-lib/cjs/core/syntax/CharCodes.js').default; +const { ReparseError, StalledParserError } = require('pdf-lib/cjs/core/errors.js'); +const { IsDigit } = require('pdf-lib/cjs/core/syntax/Numeric.js'); +const { Keywords } = require('pdf-lib/cjs/core/syntax/Keywords.js'); +const { toUint8Array, copyStringIntoBuffer, last } = require('pdf-lib/cjs/utils/index.js'); + +// Pool-deduped PDFName instances are reference-stable for the whole +// load (see fast-parse-dict.mjs for the same trick). Capture the three +// sentinels parseIndirectObject's Type-dispatch needs. +const TypeName = PDFName.of('Type'); +const ObjStmName = PDFName.of('ObjStm'); +const XRefName = PDFName.of('XRef'); +const RefZero = PDFRef.of(0); +const SizeName = PDFName.of('Size'); + +if (!PDFParser.prototype.__fastSyncLoadInstalled) { + + // ----- Load side --------------------------------------------------- + + PDFParser.prototype.parseDocument = function parseDocumentSync() { + if (this.alreadyParsed) { + throw new ReparseError('PDFParser', 'parseDocument'); + } + this.alreadyParsed = true; + this.context.header = this.parseHeader(); + + let prevOffset; + while (!this.bytes.done()) { + this.parseDocumentSection(); + const offset = this.bytes.offset(); + if (offset === prevOffset) { + throw new StalledParserError(this.bytes.position()); + } + prevOffset = offset; + } + + this.maybeRecoverRoot(); + if (this.context.lookup(RefZero)) { + console.warn('Removing parsed object: 0 0 R'); + this.context.delete(RefZero); + } + return this.context; + }; + + PDFParser.prototype.parseDocumentSection = function parseDocumentSectionSync() { + this.parseIndirectObjects(); + this.maybeParseCrossRefSection(); + this.maybeParseTrailerDict(); + this.maybeParseTrailer(); + this.skipJibberish(); + }; + + PDFParser.prototype.parseIndirectObjects = function parseIndirectObjectsSync() { + this.skipWhitespaceAndComments(); + while (!this.bytes.done() && IsDigit[this.bytes.peek()]) { + const initialOffset = this.bytes.offset(); + try { + this.parseIndirectObject(); + } catch (e) { + this.bytes.moveTo(initialOffset); + this.tryToParseInvalidIndirectObject(); + } + this.skipWhitespaceAndComments(); + // Fast path: on valid PDFs the next byte is almost always a digit + // (start of the next `N M obj` header). skipJibberish only exists + // to recover from invalid PDFs that wedge garbage between indirect + // objects, but its hot path -- 150 k calls per load on the book -- + // speculatively runs matchKeyword(xref/trailer/startxref) (all fail + // on a digit) and then matchIndirectObjectHeader (a try/catch + // around parseIndirectObjectHeader + parseRawInt x2 + matchKeyword + // + fastOf round-trip). All to confirm what the outer while's + // IsDigit check already knew. Short-circuit when the cursor is on + // a digit; fall through to skipJibberish on anything else + // (xref / trailer / startxref keyword starts, or real jibberish). + // The once-per-section skipJibberish in parseDocumentSection + // (after maybeParseTrailer) is unaffected. + if (!this.bytes.done() && IsDigit[this.bytes.peek()]) continue; + this.skipJibberish(); + } + }; + + PDFParser.prototype.parseIndirectObject = function parseIndirectObjectSync() { + const ref = this.parseIndirectObjectHeader(); + this.skipWhitespaceAndComments(); + const object = this.parseObject(); + this.skipWhitespaceAndComments(); + this.matchKeyword(Keywords.endobj); + if (object instanceof PDFRawStream && + object.dict.lookup(TypeName) === ObjStmName) { + PDFObjectStreamParser.forStream(object).parseIntoContext(); + } else if (object instanceof PDFRawStream && + object.dict.lookup(TypeName) === XRefName) { + PDFXRefStreamParser.forStream(object).parseIntoContext(); + } else { + this.context.assign(ref, object); + } + return ref; + }; + + PDFObjectStreamParser.prototype.parseIntoContext = function parseIntoContextSync() { + if (this.alreadyParsed) { + throw new ReparseError('PDFObjectStreamParser', 'parseIntoContext'); + } + this.alreadyParsed = true; + const offsetsAndObjectNumbers = this.parseOffsetsAndObjectNumbers(); + for (let i = 0, len = offsetsAndObjectNumbers.length; i < len; i++) { + const entry = offsetsAndObjectNumbers[i]; + this.bytes.moveTo(this.firstOffset + entry.offset); + const object = this.parseObject(); + const ref = PDFRef.of(entry.objectNumber, 0); + this.context.assign(ref, object); + } + }; + + // PDFDocument.load only awaited parseDocument(); now that's sync, the + // outer __awaiter is wasted too. Drop it. Signature unchanged -- + // `await PDFDocument.load(...)` on a non-Promise resolves to the value. + // The parseSpeed option is silently ignored (no more yield gate to tune). + PDFDocument.load = function loadSync(pdf, options) { + if (options === undefined) options = {}; + const ignoreEncryption = options.ignoreEncryption === undefined ? false : options.ignoreEncryption; + const throwOnInvalidObject = options.throwOnInvalidObject === undefined ? false : options.throwOnInvalidObject; + const updateMetadata = options.updateMetadata === undefined ? true : options.updateMetadata; + const capNumbers = options.capNumbers === undefined ? false : options.capNumbers; + const bytes = toUint8Array(pdf); + const context = PDFParser.forBytesWithOptions( + bytes, Infinity, throwOnInvalidObject, capNumbers, + ).parseDocument(); + return new PDFDocument(context, ignoreEncryption, updateMetadata); + }; + + // ----- Save side --------------------------------------------------- + + // PDFWriter.serializeToBuffer awaits computeBufferSize, which in our + // pipeline is the ParallelStreamWriter override -- genuinely async + // because of `await Promise.all(deflated)` over libuv's thread pool. + // So the wrapper stays async. The conditional waitForTick yield in + // its main loop is the only piece we strip. + PDFWriter.prototype.serializeToBuffer = async function serializeToBufferSync() { + const { size, header, indirectObjects, xref, trailerDict, trailer } = + await this.computeBufferSize(); + const buffer = new Uint8Array(size); + let offset = 0; + offset += header.copyBytesInto(buffer, offset); + buffer[offset++] = CharCodes.Newline; + buffer[offset++] = CharCodes.Newline; + for (let idx = 0, len = indirectObjects.length; idx < len; idx++) { + const indirectObject = indirectObjects[idx]; + const ref = indirectObject[0]; + const object = indirectObject[1]; + offset += copyStringIntoBuffer(String(ref.objectNumber), buffer, offset); + buffer[offset++] = CharCodes.Space; + offset += copyStringIntoBuffer(String(ref.generationNumber), buffer, offset); + buffer[offset++] = CharCodes.Space; + buffer[offset++] = CharCodes.o; + buffer[offset++] = CharCodes.b; + buffer[offset++] = CharCodes.j; + buffer[offset++] = CharCodes.Newline; + offset += object.copyBytesInto(buffer, offset); + buffer[offset++] = CharCodes.Newline; + buffer[offset++] = CharCodes.e; + buffer[offset++] = CharCodes.n; + buffer[offset++] = CharCodes.d; + buffer[offset++] = CharCodes.o; + buffer[offset++] = CharCodes.b; + buffer[offset++] = CharCodes.j; + buffer[offset++] = CharCodes.Newline; + buffer[offset++] = CharCodes.Newline; + } + if (xref) { + offset += xref.copyBytesInto(buffer, offset); + buffer[offset++] = CharCodes.Newline; + } + if (trailerDict) { + offset += trailerDict.copyBytesInto(buffer, offset); + buffer[offset++] = CharCodes.Newline; + buffer[offset++] = CharCodes.Newline; + } + offset += trailer.copyBytesInto(buffer, offset); + return buffer; + }; + + // PDFWriter.computeBufferSize -- the basic (non-stream) writer's + // sizing pass. Not on our pipeline's hot path (we route through + // PDFStreamWriter via ParallelStreamWriter, both of which override + // this method) but patched for consistency: the only async thing + // upstream is the conditional waitForTick yield in its loop. + PDFWriter.prototype.computeBufferSize = function computeBufferSizeBaseSync() { + const header = PDFHeader.forVersion(1, 7); + let size = header.sizeInBytes() + 2; + const xref = PDFCrossRefSection.create(); + const indirectObjects = this.context.enumerateIndirectObjects(); + for (let idx = 0, len = indirectObjects.length; idx < len; idx++) { + const indirectObject = indirectObjects[idx]; + const ref = indirectObject[0]; + xref.addEntry(ref, size); + size += this.computeIndirectObjectSize(indirectObject); + } + const xrefOffset = size; + size += xref.sizeInBytes() + 1; + const trailerDict = PDFTrailerDict.of(this.createTrailerDict()); + size += trailerDict.sizeInBytes() + 2; + const trailer = PDFTrailer.forLastCrossRefSectionOffset(xrefOffset); + size += trailer.sizeInBytes(); + return { size, header, indirectObjects, xref, trailerDict, trailer }; + }; + + // PDFStreamWriter.computeBufferSize -- the upstream stream writer's + // sizing pass with two waitForTick gates (one per loop). Not on our + // pipeline's hot path (ParallelStreamWriter overrides this with its + // own three-phase parallel-deflate version) but patched for + // consistency. Logic mirrors the upstream method body exactly. + PDFStreamWriter.prototype.computeBufferSize = function computeBufferSizeStreamSync() { + let objectNumber = this.context.largestObjectNumber + 1; + const header = PDFHeader.forVersion(1, 7); + let size = header.sizeInBytes() + 2; + const xrefStream = PDFCrossRefStream.create(this.createTrailerDict(), this.encodeStreams); + + const uncompressedObjects = []; + const compressedObjects = []; + const objectStreamRefs = []; + + const indirectObjects = this.context.enumerateIndirectObjects(); + for (let idx = 0, len = indirectObjects.length; idx < len; idx++) { + const indirectObject = indirectObjects[idx]; + const ref = indirectObject[0]; + const object = indirectObject[1]; + const shouldNotCompress = + ref === this.context.trailerInfo.Encrypt || + object instanceof PDFStream || + object instanceof PDFInvalidObject || + ref.generationNumber !== 0; + if (shouldNotCompress) { + uncompressedObjects.push(indirectObject); + xrefStream.addUncompressedEntry(ref, size); + size += this.computeIndirectObjectSize(indirectObject); + } else { + let chunk = last(compressedObjects); + let objectStreamRef = last(objectStreamRefs); + if (!chunk || chunk.length % this.objectsPerStream === 0) { + chunk = []; + compressedObjects.push(chunk); + objectStreamRef = PDFRef.of(objectNumber++); + objectStreamRefs.push(objectStreamRef); + } + xrefStream.addCompressedEntry(ref, objectStreamRef, chunk.length); + chunk.push(indirectObject); + } + } + + for (let idx = 0, len = compressedObjects.length; idx < len; idx++) { + const chunk = compressedObjects[idx]; + const ref = objectStreamRefs[idx]; + const objectStream = PDFObjectStream.withContextAndObjects(this.context, chunk, this.encodeStreams); + xrefStream.addUncompressedEntry(ref, size); + size += this.computeIndirectObjectSize([ref, objectStream]); + uncompressedObjects.push([ref, objectStream]); + } + + const xrefStreamRef = PDFRef.of(objectNumber++); + xrefStream.dict.set(SizeName, PDFNumber.of(objectNumber)); + xrefStream.addUncompressedEntry(xrefStreamRef, size); + const xrefOffset = size; + size += this.computeIndirectObjectSize([xrefStreamRef, xrefStream]); + uncompressedObjects.push([xrefStreamRef, xrefStream]); + + const trailer = PDFTrailer.forLastCrossRefSectionOffset(xrefOffset); + size += trailer.sizeInBytes(); + return { size, header, indirectObjects: uncompressedObjects, trailer }; + }; + + PDFParser.prototype.__fastSyncLoadInstalled = true; +} diff --git a/docs/lib/measure-pass.mjs b/docs/lib/measure-pass.mjs new file mode 100644 index 0000000..293e688 --- /dev/null +++ b/docs/lib/measure-pass.mjs @@ -0,0 +1,600 @@ +// No-allocate measure pass over a PDF byte stream. +// +// Walks the PDF grammar (indirect objects, dicts, arrays, names, +// numbers, refs, strings, streams, ObjStms-with-inflate) without +// instantiating any PDFObject. Produces counts that downstream +// pre-sizing shims consume: +// +// { indirectObjects, dicts, dictSlots, arrays, arraySlots, +// refs, names, numbers, strings, hexStrings, streams, +// objStms, objStmInner, maxDictSlots, maxArraySlots, +// maxRecursion, totalStreamBytes, totalInflatedBytes } +// +// Counts are *appearances*, not unique values. Phase 2+ will add +// interning to produce unique-count tables (for exact name/ref/ +// number pool sizing). +// +// Allocation discipline: +// - No string concat. Names, numbers, strings are skipped by +// advancing the byte cursor without keeping bytes. +// - Per-dict captures (/Length, /Type, /N, /First) live on +// depth-indexed typed-array stacks. Max recursion observed +// on the book is 4; stack size 64 is plenty. +// - ObjStm offset arrays are reusable Int32Array(512), grown +// on demand. The inflate destination is a fresh Buffer per +// ObjStm (Chrome's raw output has zero ObjStms; book.pdf +// has 453 after pdf-lib's save bundles them). +// +// One PDF parse-corner to remember: PDF reals can omit the +// integer part. `.251` is valid (Chrome emits it for /CA, /ca +// alpha values). The parser accepts `[sign?][digits?] +// [.[digits?]]?` with the constraint that at least one digit +// appears. + +import { inflateSync } from 'node:zlib'; + +// ---- Byte constants ------------------------------------------------- + +const TAB = 9, LF = 10, FF = 12, CR = 13, SP = 32; +const LT = 60 /* < */, GT = 62 /* > */; +const LB = 91 /* [ */, RB = 93 /* ] */; +const LP = 40 /* ( */, RP = 41 /* ) */; +const SLASH = 47, PERCENT = 37, BACKSLASH = 92; +const D0 = 48, D9 = 57; +const MINUS = 45, PLUS = 43, DOT = 46; +const a_ = 97, b_ = 98, d_ = 100, e_ = 101, f_ = 102, j_ = 106; +const l_ = 108, m_ = 109, n_ = 110, o_ = 111, r_ = 114, s_ = 115; +const t_ = 116, u_ = 117, x_ = 120; +const R_CH = 82, L_CH = 76, T_CH = 84, N_CH = 78, F_CH = 70; + +// ---- Lookup tables (mirror pdf-lib's IsWhitespace / IsDelimiter / IsDigit / IsNumeric) ---- + +const IsWS = new Uint8Array(256); +IsWS[0] = IsWS[TAB] = IsWS[LF] = IsWS[FF] = IsWS[CR] = IsWS[SP] = 1; + +const IsDelim = new Uint8Array(256); +IsDelim[LT] = IsDelim[GT] = IsDelim[LB] = IsDelim[RB] = 1; +IsDelim[LP] = IsDelim[RP] = IsDelim[SLASH] = IsDelim[PERCENT] = 1; + +const IsDigit = new Uint8Array(256); +for (let b = D0; b <= D9; b++) IsDigit[b] = 1; + +const IsNumeric = new Uint8Array(IsDigit); +IsNumeric[DOT] = IsNumeric[MINUS] = IsNumeric[PLUS] = 1; + +// ---- Measurer ------------------------------------------------------- + +export class Measurer { + constructor(buf) { + this.buf = buf; + this.pos = 0; + this._len = buf.length; + + this.numIndirectObjects = 0; + this.numDicts = 0; + this.numDictSlots = 0; + this.numArrays = 0; + this.numArraySlots = 0; + this.numRefs = 0; + this.numNames = 0; + this.numNumbers = 0; + this.numStrings = 0; + this.numHexStrings = 0; + this.numStreams = 0; + this.numObjStms = 0; + this.numObjStmInnerObjects = 0; + this.maxDictSlots = 0; + this.maxArraySlots = 0; + this.maxRecursionDepth = 0; + this.totalStreamBytes = 0; + this.totalInflatedBytes = 0; + + const MAX_DEPTH = 64; + this._depth = 0; + this._stLength = new Int32Array(MAX_DEPTH); + this._stIsObjStm = new Uint8Array(MAX_DEPTH); + this._stN = new Int32Array(MAX_DEPTH); + this._stFirst = new Int32Array(MAX_DEPTH); + + this._objNums = new Int32Array(512); + this._objOffsets = new Int32Array(512); + } + + // ---- Skip helpers (no allocation) -------------------------------- + + skipWS() { + const buf = this.buf, len = this._len; + let p = this.pos; + while (p < len) { + const b = buf[p]; + if (IsWS[b]) { p++; continue; } + if (b === PERCENT) { + while (p < len && buf[p] !== LF && buf[p] !== CR) p++; + continue; + } + break; + } + this.pos = p; + } + + // Parse an integer in place. No string concat. Returns NaN if no digit. + // Does NOT bump numNumbers (used for metadata: header, ObjStm offsets). + _skipInt() { + const buf = this.buf, len = this._len; + let p = this.pos, v = 0, sign = 1, any = 0; + if (buf[p] === MINUS) { sign = -1; p++; } + else if (buf[p] === PLUS) { p++; } + while (p < len) { + const b = buf[p]; + if (b < D0 || b > D9) break; + v = v * 10 + (b - D0); + any = 1; p++; + } + this.pos = p; + return any ? sign * v : NaN; + } + + _skipNameBody() { + const buf = this.buf, len = this._len; + let p = this.pos; + while (p < len) { + const b = buf[p]; + if (IsWS[b] || IsDelim[b]) break; + p++; + } + this.pos = p; + } + + skipName() { + this.pos++; + this._skipNameBody(); + this.numNames++; + } + + skipString() { + this.pos++; + const buf = this.buf, len = this._len; + let p = this.pos, depth = 1; + while (p < len && depth > 0) { + const b = buf[p]; + if (b === BACKSLASH) { p += 2; continue; } + if (b === LP) depth++; + else if (b === RP) depth--; + p++; + } + this.pos = p; + this.numStrings++; + } + + skipHexString() { + this.pos++; + const buf = this.buf, len = this._len; + let p = this.pos; + while (p < len && buf[p] !== GT) p++; + p++; + this.pos = p; + this.numHexStrings++; + } + + // Skip /name; tag whether it matched a known stream-related key. + // 0=other, 1=Length, 2=Type, 3=N, 4=First. + matchDictKey() { + const buf = this.buf, len = this._len; + this.pos++; + const start = this.pos; + let match = 0; + const b0 = buf[start]; + if (b0 === L_CH) { + if (start + 6 <= len && + buf[start+1] === e_ && buf[start+2] === n_ && + buf[start+3] === 103 /* g */ && buf[start+4] === t_ && + buf[start+5] === 104 /* h */ && + (start+6 === len || IsWS[buf[start+6]] || IsDelim[buf[start+6]])) { + match = 1; this.pos = start + 6; + } + } else if (b0 === T_CH) { + if (start + 4 <= len && + buf[start+1] === 121 /* y */ && buf[start+2] === 112 /* p */ && + buf[start+3] === e_ && + (start+4 === len || IsWS[buf[start+4]] || IsDelim[buf[start+4]])) { + match = 2; this.pos = start + 4; + } + } else if (b0 === N_CH) { + if (start + 1 === len || IsWS[buf[start+1]] || IsDelim[buf[start+1]]) { + match = 3; this.pos = start + 1; + } + } else if (b0 === F_CH) { + if (start + 5 <= len && + buf[start+1] === 105 /* i */ && buf[start+2] === r_ && + buf[start+3] === s_ && buf[start+4] === t_ && + (start+5 === len || IsWS[buf[start+5]] || IsDelim[buf[start+5]])) { + match = 4; this.pos = start + 5; + } + } + if (match === 0) this._skipNameBody(); + this.numNames++; + return match; + } + + // After / is already skipped, check if name body equals an ASCII string. + // Does NOT move pos. + _isNameAt(p, name) { + const buf = this.buf, len = this._len; + const n = name.length; + if (p + n > len) return false; + for (let i = 0; i < n; i++) { + if (buf[p + i] !== name.charCodeAt(i)) return false; + } + if (p + n === len) return true; + const after = buf[p + n]; + return !!(IsWS[after] || IsDelim[after]); + } + + // ---- Number / Ref ------------------------------------------------ + + // PDF number grammar: optional sign, optional digits, optional dot, + // optional digits. At least one digit required somewhere. No exps. + // Returns the integer value for pure-integer-non-ref case (for + // /Length capture); else NaN. + parseNumberOrRefCapture() { + const buf = this.buf, len = this._len; + let p = this.pos; + let sign = 1; + if (buf[p] === MINUS) { sign = -1; p++; } + else if (buf[p] === PLUS) { p++; } + let intDigits = 0, intVal = 0; + while (p < len && buf[p] >= D0 && buf[p] <= D9) { + intVal = intVal * 10 + (buf[p] - D0); + intDigits++; p++; + } + let hasDot = 0, fracDigits = 0; + if (p < len && buf[p] === DOT) { + hasDot = 1; p++; + while (p < len && buf[p] >= D0 && buf[p] <= D9) { fracDigits++; p++; } + } + if (intDigits === 0 && fracDigits === 0) { + throw new Error('measure-pass: expected number at ' + this.pos); + } + this.pos = p; + if (hasDot) { + this.numNumbers++; + return NaN; + } + const save = this.pos; + this.skipWS(); + if (this.pos < len && IsDigit[buf[this.pos]]) { + this._skipInt(); + this.skipWS(); + if (this.pos < len && buf[this.pos] === R_CH) { + this.pos++; + this.numRefs++; + return NaN; + } + } + this.pos = save; + this.numNumbers++; + return sign * intVal; + } + + // ---- Object dispatch -------------------------------------------- + + parseObject() { + this.skipWS(); + const buf = this.buf, len = this._len; + if (this.pos >= len) return; + const b = buf[this.pos]; + + if (b === t_) { + if (this.pos + 4 <= len && + buf[this.pos+1] === r_ && buf[this.pos+2] === u_ && buf[this.pos+3] === e_) { + this.pos += 4; return; + } + } else if (b === f_) { + if (this.pos + 5 <= len && + buf[this.pos+1] === a_ && buf[this.pos+2] === l_ && + buf[this.pos+3] === s_ && buf[this.pos+4] === e_) { + this.pos += 5; return; + } + } else if (b === n_) { + if (this.pos + 4 <= len && + buf[this.pos+1] === u_ && buf[this.pos+2] === l_ && buf[this.pos+3] === l_) { + this.pos += 4; return; + } + } + + if (b === LT) { + if (buf[this.pos + 1] === LT) { + const d = this._depth; + this.parseDict(); + this._depth = d; + return; + } + this.skipHexString(); + return; + } + if (b === LP) { this.skipString(); return; } + if (b === SLASH) { this.skipName(); return; } + if (b === LB) { this.parseArray(); return; } + if (IsNumeric[b]) { this.parseNumberOrRefCapture(); return; } + + throw new Error(`measure-pass: unexpected byte ${b} ('${String.fromCharCode(b)}') at ${this.pos}`); + } + + // Parse << ... >>. Push frame on stack; do NOT decrement depth. + // Caller reads stack frame at index this._depth - 1 and decrements. + parseDict() { + const d = this._depth++; + if (d >= 64) throw new Error('measure-pass: dict depth overflow at ' + this.pos); + if (this._depth > this.maxRecursionDepth) this.maxRecursionDepth = this._depth; + this._stLength[d] = -1; + this._stIsObjStm[d] = 0; + this._stN[d] = -1; + this._stFirst[d] = -1; + + this.pos += 2; + this.skipWS(); + + const buf = this.buf, len = this._len; + let count = 0; + while (this.pos < len) { + if (buf[this.pos] === GT && buf[this.pos + 1] === GT) break; + if (buf[this.pos] !== SLASH) throw new Error('measure-pass: expected name at ' + this.pos); + + const tag = this.matchDictKey(); + this.skipWS(); + + if (tag === 1 && IsNumeric[buf[this.pos]]) { + const v = this.parseNumberOrRefCapture(); + if (!isNaN(v)) this._stLength[d] = v; + } else if (tag === 2 && buf[this.pos] === SLASH) { + if (this._isNameAt(this.pos + 1, 'ObjStm')) this._stIsObjStm[d] = 1; + this.pos++; + this._skipNameBody(); + this.numNames++; + } else if (tag === 3 && IsNumeric[buf[this.pos]]) { + const v = this.parseNumberOrRefCapture(); + if (!isNaN(v)) this._stN[d] = v; + } else if (tag === 4 && IsNumeric[buf[this.pos]]) { + const v = this.parseNumberOrRefCapture(); + if (!isNaN(v)) this._stFirst[d] = v; + } else { + this.parseObject(); + } + this.skipWS(); + count++; + } + this.pos += 2; + + this.numDicts++; + this.numDictSlots += count * 2; + if (count * 2 > this.maxDictSlots) this.maxDictSlots = count * 2; + } + + parseArray() { + const d = this._depth++; + if (this._depth > this.maxRecursionDepth) this.maxRecursionDepth = this._depth; + + this.pos++; + this.skipWS(); + + const buf = this.buf, len = this._len; + let count = 0; + while (this.pos < len && buf[this.pos] !== RB) { + this.parseObject(); + this.skipWS(); + count++; + } + this.pos++; + + this.numArrays++; + this.numArraySlots += count; + if (count > this.maxArraySlots) this.maxArraySlots = count; + this._depth--; + } + + // ---- Indirect object + stream handling -------------------------- + + findEndStream(from) { + const buf = this.buf, len = this._len; + let p = from; + while (p + 9 <= len) { + if (buf[p] === e_ && buf[p+1] === n_ && buf[p+2] === d_ && + buf[p+3] === s_ && buf[p+4] === t_ && buf[p+5] === r_ && + buf[p+6] === e_ && buf[p+7] === a_ && buf[p+8] === m_) { + let end = p; + while (end > from && (buf[end-1] === LF || buf[end-1] === CR)) end--; + return end; + } + p++; + } + throw new Error('measure-pass: endstream not found from ' + from); + } + + processObjStm(start, end, N, first) { + const compressed = this.buf.subarray(start, end); + let inflated; + try { + inflated = inflateSync(compressed); + } catch (e) { + console.warn(`measure-pass: inflate failed at ${start}: ${e.message}`); + return; + } + this.totalInflatedBytes += inflated.length; + this.numObjStmInnerObjects += N; + + if (N > this._objOffsets.length) { + this._objOffsets = new Int32Array(N); + this._objNums = new Int32Array(N); + } + + const saveBuf = this.buf, savePos = this.pos, saveLen = this._len; + this.buf = inflated; + this.pos = 0; + this._len = inflated.length; + + for (let i = 0; i < N; i++) { + this.skipWS(); + this._objNums[i] = this._skipInt(); + this.skipWS(); + this._objOffsets[i] = this._skipInt(); + } + for (let i = 0; i < N; i++) { + this.pos = first + this._objOffsets[i]; + const d0 = this._depth; + this.parseObject(); + this._depth = d0; + } + + this.buf = saveBuf; + this.pos = savePos; + this._len = saveLen; + } + + parseIndirectObject() { + this.skipWS(); + this._skipInt(); + this.skipWS(); + this._skipInt(); + this.skipWS(); + + const buf = this.buf, len = this._len; + if (!(this.pos + 3 <= len && buf[this.pos] === o_ && buf[this.pos+1] === b_ && buf[this.pos+2] === j_)) { + throw new Error('measure-pass: expected "obj" at ' + this.pos); + } + this.pos += 3; + this.skipWS(); + this.numIndirectObjects++; + + const frameDepth = this._depth; + let wasDict = false; + if (this.pos + 2 <= len && buf[this.pos] === LT && buf[this.pos+1] === LT) { + this.parseDict(); + wasDict = true; + } else { + this.parseObject(); + } + this.skipWS(); + + if (wasDict && this.pos + 6 <= len && + buf[this.pos] === s_ && buf[this.pos+1] === t_ && buf[this.pos+2] === r_ && + buf[this.pos+3] === e_ && buf[this.pos+4] === a_ && buf[this.pos+5] === m_) { + this.pos += 6; + if (this.pos < len && buf[this.pos] === CR) this.pos++; + if (this.pos < len && buf[this.pos] === LF) this.pos++; + + const streamStart = this.pos; + const length = this._stLength[frameDepth]; + const isObjStm = this._stIsObjStm[frameDepth]; + const N = this._stN[frameDepth]; + const first = this._stFirst[frameDepth]; + + let streamEnd; + if (length > 0) { + streamEnd = streamStart + length; + if (streamEnd > len || + !(buf[streamEnd] === LF || buf[streamEnd] === CR || + buf[streamEnd] === e_ || IsWS[buf[streamEnd]])) { + streamEnd = this.findEndStream(streamStart); + } + } else { + streamEnd = this.findEndStream(streamStart); + } + this.pos = streamEnd; + this.totalStreamBytes += (streamEnd - streamStart); + this.numStreams++; + + if (isObjStm && N > 0 && first > 0) { + this.numObjStms++; + this.processObjStm(streamStart, streamEnd, N, first); + this.pos = streamEnd; + } + + this.skipWS(); + if (this.pos + 9 <= len && + buf[this.pos] === e_ && buf[this.pos+1] === n_ && buf[this.pos+2] === d_ && + buf[this.pos+3] === s_ && buf[this.pos+4] === t_ && buf[this.pos+5] === r_ && + buf[this.pos+6] === e_ && buf[this.pos+7] === a_ && buf[this.pos+8] === m_) { + this.pos += 9; + } + this.skipWS(); + } + + if (wasDict) this._depth = frameDepth; + + this.skipWS(); + if (this.pos + 6 <= len && + buf[this.pos] === e_ && buf[this.pos+1] === n_ && buf[this.pos+2] === d_ && + buf[this.pos+3] === o_ && buf[this.pos+4] === b_ && buf[this.pos+5] === j_) { + this.pos += 6; + } + } + + walk() { + const buf = this.buf, len = this._len; + + while (this.pos < len) { + this.skipWS(); + if (this.pos >= len) break; + const b = buf[this.pos]; + if (IsDigit[b]) { + const save = this.pos; + this._skipInt(); + if (buf[this.pos] === SP || buf[this.pos] === TAB) { + this.skipWS(); + if (IsDigit[buf[this.pos]]) { + this._skipInt(); + this.skipWS(); + if (this.pos + 3 <= len && buf[this.pos] === o_ && + buf[this.pos+1] === b_ && buf[this.pos+2] === j_) { + this.pos = save; + break; + } + } + } + this.pos = save + 1; + } else { + this.pos++; + } + } + + while (this.pos < len) { + this.skipWS(); + if (this.pos >= len) break; + const b = buf[this.pos]; + if (b === x_) break; + if (b === t_ && buf[this.pos+1] === r_ && buf[this.pos+2] === a_ && + buf[this.pos+3] === 105 /* i */) break; + if (b === s_ && buf[this.pos+1] === t_ && buf[this.pos+2] === a_ && + buf[this.pos+3] === r_ && buf[this.pos+4] === t_) break; + if (!IsDigit[b]) break; + this.parseIndirectObject(); + } + } +} + +// ---- Convenience wrapper ------------------------------------------- + +export function measure(bytes) { + const m = new Measurer(bytes); + m.walk(); + return { + indirectObjects: m.numIndirectObjects, + dicts: m.numDicts, + dictSlots: m.numDictSlots, + arrays: m.numArrays, + arraySlots: m.numArraySlots, + refs: m.numRefs, + names: m.numNames, + numbers: m.numNumbers, + strings: m.numStrings, + hexStrings: m.numHexStrings, + streams: m.numStreams, + objStms: m.numObjStms, + objStmInner: m.numObjStmInnerObjects, + maxDictSlots: m.maxDictSlots, + maxArraySlots: m.maxArraySlots, + maxRecursion: m.maxRecursionDepth, + totalStreamBytes: m.totalStreamBytes, + totalInflatedBytes: m.totalInflatedBytes, + }; +} diff --git a/docs/lib/parallel-deflate.mjs b/docs/lib/parallel-deflate.mjs new file mode 100644 index 0000000..0069871 --- /dev/null +++ b/docs/lib/parallel-deflate.mjs @@ -0,0 +1,187 @@ +// Drop-in async replacement for `pdfDoc.save({ useObjectStreams: true })` +// that parallelises the per-object-stream deflate work onto libuv's +// thread pool. Sole exported entry point: `parallelSave(pdfDoc, opts)`. +// +// Why: pdf-lib's PDFStreamWriter.computeBufferSize creates one +// PDFObjectStream per chunk, then immediately calls +// computeIndirectObjectSize on each. sizeInBytes() walks the Cache, +// which lazy-populates via a deflate of the unencoded contents. The +// whole pass is synchronous, so the per-chunk zlib work runs serially +// -- accounted for ~30 % of save() wall time on the book before this. +// +// What: same construction logic as PDFStreamWriter, split into three +// phases: +// 1. classify uncompressed vs compressed (same as upstream) +// 2. instantiate every PDFObjectStream up-front, then `await +// Promise.all` an async node:zlib.deflate per stream so libuv's +// thread pool (default 4) runs them concurrently +// 3. size + emit (same as upstream, but every cache.access() is a hit) +// The xrefStream is one more PDFFlateStream whose contents depend on +// the offsets computed in phase 3; we pre-deflate it once via +// node:zlib.deflateSync right after those offsets are pinned, so even +// that final stream never falls back to pdf-lib's pure-JS deflate. +// +// Output: byte-near-equivalent to pdfDoc.save({ useObjectStreams: true }). +// node:zlib's match choices in the LZ77 inner loop may differ from +// pdf-lib's default deflate library, producing 1-byte-level stream +// content and matching /Length deltas; viewer-invisible. +// +// Parallelism is bounded by UV_THREADPOOL_SIZE (default 4). Bump it via +// `process.env.UV_THREADPOOL_SIZE = '8'` before any libuv work fires +// if you want more concurrency. + +import { deflate, deflateSync } from 'node:zlib'; +import { promisify } from 'node:util'; +import { + PDFStreamWriter, + PDFObjectStream, + PDFCrossRefStream, + PDFRef, + PDFName, + PDFNumber, + PDFInvalidObject, + PDFStream, + PDFHeader, + PDFTrailer, +} from 'pdf-lib'; + +const deflateAsync = promisify(deflate); + +class ParallelStreamWriter extends PDFStreamWriter { + constructor(context, encodeStreams, objectsPerStream, parallel) { + // PDFWriter's second ctor param is objectsPerTick -- the yield knob + // that drives shouldWaitForTick. fast-sync-load.mjs rips out every + // caller of shouldWaitForTick on both the parser and writer sides, + // so the value here is vestigial. Pass Infinity for explicitness. + super(context, Infinity, encodeStreams, objectsPerStream); + this._lastPrecompressed = 0; + this._parallel = parallel; + } + + async computeBufferSize() { + let objectNumber = this.context.largestObjectNumber + 1; + const header = PDFHeader.forVersion(1, 7); + let size = header.sizeInBytes() + 2; + const xrefStream = PDFCrossRefStream.create( + this.createTrailerDict(), + this.encodeStreams, + ); + + const uncompressedObjects = []; + const compressedChunks = []; + const objectStreamRefs = []; + + // ----- Phase 1: classify ----- + const indirectObjects = this.context.enumerateIndirectObjects(); + for (let i = 0; i < indirectObjects.length; i++) { + const indirectObject = indirectObjects[i]; + const [ref, object] = indirectObject; + const shouldNotCompress = + ref === this.context.trailerInfo.Encrypt || + object instanceof PDFStream || + object instanceof PDFInvalidObject || + ref.generationNumber !== 0; + + if (shouldNotCompress) { + uncompressedObjects.push(indirectObject); + xrefStream.addUncompressedEntry(ref, size); + size += this.computeIndirectObjectSize(indirectObject); + } else { + let chunk = compressedChunks.length === 0 ? null : compressedChunks[compressedChunks.length - 1]; + let objectStreamRef = objectStreamRefs.length === 0 ? null : objectStreamRefs[objectStreamRefs.length - 1]; + if (!chunk || chunk.length % this.objectsPerStream === 0) { + chunk = []; + compressedChunks.push(chunk); + objectStreamRef = PDFRef.of(objectNumber++); + objectStreamRefs.push(objectStreamRef); + } + xrefStream.addCompressedEntry(ref, objectStreamRef, chunk.length); + chunk.push(indirectObject); + } + } + + // ----- Phase 2: instantiate object streams and parallel-deflate ----- + const objectStreams = compressedChunks.map(chunk => + PDFObjectStream.withContextAndObjects(this.context, chunk, this.encodeStreams), + ); + + if (this._parallel && this.encodeStreams && objectStreams.length > 0) { + // Fire each deflate onto libuv as soon as its buffer is built, + // so deflate of stream N runs concurrently with the build of + // N+1..453 instead of after all 453 builds finish. Saves the + // main-thread idle wait at the Promise.all (~30 ms on the book). + const deflated = await Promise.all( + objectStreams.map(os => deflateAsync(os.getUnencodedContents())), + ); + for (let i = 0; i < objectStreams.length; i++) { + objectStreams[i].contentsCache.value = deflated[i]; + } + this._lastPrecompressed = objectStreams.length; + } else { + this._lastPrecompressed = 0; + } + + // ----- Phase 3: size object streams (cache hits) ----- + for (let i = 0; i < objectStreams.length; i++) { + const ref = objectStreamRefs[i]; + const objectStream = objectStreams[i]; + xrefStream.addUncompressedEntry(ref, size); + size += this.computeIndirectObjectSize([ref, objectStream]); + uncompressedObjects.push([ref, objectStream]); + } + + // ----- xrefStream wrap-up ----- + // Its contents depend on the offsets computed above, so we can only + // populate them now. One stream -- deflate sync via node:zlib and + // pre-populate the cache so the subsequent computeIndirectObjectSize + // is a cache hit (otherwise pdf-lib's lazy populate would run its + // own deflate library on the main thread). + const xrefStreamRef = PDFRef.of(objectNumber++); + xrefStream.dict.set(PDFName.of('Size'), PDFNumber.of(objectNumber)); + xrefStream.addUncompressedEntry(xrefStreamRef, size); + const xrefOffset = size; + if (this.encodeStreams) { + xrefStream.contentsCache.value = deflateSync(xrefStream.getUnencodedContents()); + } + size += this.computeIndirectObjectSize([xrefStreamRef, xrefStream]); + uncompressedObjects.push([xrefStreamRef, xrefStream]); + + const trailer = PDFTrailer.forLastCrossRefSectionOffset(xrefOffset); + size += trailer.sizeInBytes(); + + return { size, header, indirectObjects: uncompressedObjects, trailer }; + } +} + +/** + * Replacement for `pdfDoc.save({ useObjectStreams: true })` with parallel + * deflate. Mirrors PDFDocument.save's pre-serialize steps (addDefaultPage, + * updateFieldAppearances, flush) before invoking the patched writer. + * + * Returns { bytes: Uint8Array, streamCount: number }. + */ +export async function parallelSave(pdfDoc, options = {}) { + const { + addDefaultPage = true, + updateFieldAppearances = true, + objectsPerStream = 50, + encodeStreams = true, + parallel = true, + } = options; + + if (addDefaultPage && pdfDoc.getPageCount() === 0) pdfDoc.addPage(); + if (updateFieldAppearances) { + const form = pdfDoc.formCache.getValue(); + if (form) form.updateFieldAppearances(); + } + await pdfDoc.flush(); + + const writer = new ParallelStreamWriter( + pdfDoc.context, + encodeStreams, + objectsPerStream, + parallel, + ); + const bytes = await writer.serializeToBuffer(); + return { bytes, streamCount: writer._lastPrecompressed }; +} diff --git a/docs/render-book.mjs b/docs/render-book.mjs index e7ad9bf..08117d5 100644 --- a/docs/render-book.mjs +++ b/docs/render-book.mjs @@ -31,9 +31,171 @@ import { pathToFileURL, fileURLToPath } from 'node:url'; import { dirname, resolve } from 'node:path'; import { writeFileSync, existsSync } from 'node:fs'; import puppeteer from 'puppeteer'; -import { PDFDocument, ParseSpeeds } from 'pdf-lib'; +import { PDFDocument } from 'pdf-lib'; +// Side-effecting imports. Mutate pdf-lib's live module exports +// before any pdf-lib operation -- order doesn't matter. See +// perf/notes/08-pdf-lib.md. +// +// fast-refs-class -- dense-array cache in front of PDFRef.of for +// the gen=0 case (82 % of ~1.2 M calls per load) PLUS a +// class-constructor shape for the PDFRef instance, AND drops +// the per-instance `tag` string (toString / sizeInBytes / +// copyBytesInto compute from objectNumber / generationNumber +// directly via _writeUint + _digitCount helpers). Replaces the +// `Object.create(PDFRef.prototype) + property writes` pattern of +// the older fast-refs.mjs shim, which V8 routes through the +// slow-property path: PDFRef ended up at ~60 B/instance vs +// PDFName's ~31 B (`new PDFName(...)`-built). The constructor +// gives V8 a stable hidden class from the first instance and +// drops per-instance cost to ~44 B. On the book (226 k unique +// PDFRefs) the combined effect is ~3.87 MB heap (-8.5 % of +// total process-phase allocation) and ~140 ms wall-clock (-12 % +// of process) on top of the tag-drop refinement that already +// trimmed parseIndirectObjectHeader by ~4.3 MB. Same prototype +// methods, same instanceof semantics; the only change is the +// construction style. See "fast-refs-class" in +// perf/notes/08-pdf-lib.md. fast-refs.mjs stays in the tree as +// an A/B baseline (mutex-checked in measure.mjs). +// fast-inflate -- swaps pako.inflate for node:zlib.inflateSync +// on the one pdf-lib call site that uses it +// (PDFCrossRefStreamParser during load). Negligible cost shift, +// but eliminates the last pdf-lib -> pako call at runtime. +// fast-parse-number -- direct-integer accumulators in front of +// BaseParser.parseRawNumber + parseRawInt, skipping per-byte +// string concat and the trailing Number() round-trip. Touches +// every numeric token parsed during PDFDocument.load. +// fast-decode-name -- cache in front of PDFName.of that skips +// the decodeName regex scan when the input has no `#` (which +// is 99.999 % of the ~2.8 M PDFName.of calls per load). +// fast-number-to-string -- short-circuit numberToString when +// `String(num)` already lacks an `e` (i.e. for every PDF number +// that's not in the exponential-notation tail). Skips a +// redundant toString + split + parseInt per call. +// fast-size-in-bytes -- replace utils.sizeInBytes (which allocates +// `n.toString(2)` just to count its bit length) with a non- +// allocating short-circuit ladder. Called ~300 k times per save +// from PDFCrossRefStream's xref writer. +// fast-dict-onebuf -- one long-lived buffer for every committed +// PDFDict entry across the whole document. Parser uses a small +// per-instance temp array as a stack of recursion frames; each +// parseDict invocation appends to temp, commits its frame to +// main in one contiguous append, and pops temp back. PDFDicts +// only ever read from main, so a packed (start, length, owned) +// Number is the whole instance state -- no separate bufIdx. +// Owned dicts (factory-created post-parse) also append to main. +// Mutations: in-place replace for existing keys, COW (copy +// range to tail, push new pair) for new keys or delete. +// PDFContext is a singleton -- one PDFDocument.load per +// process; a second distinct context throws. Subsumes +// fast-dict-array. Process-phase heap traffic drops from the +// Map-backed baseline of ~152 MB down to ~66 MB (-57%); -22% +// beyond fast-dict-array. See "One-buffer PDFDict" in +// perf/notes/08-pdf-lib.md. +// +// Earlier dict-shape shims (fast-dict-array, fast-dict-iter, +// fast-parse-dict) stay in the tree as A/B baselines but are +// mutually exclusive with --fast-dict-onebuf in measure.mjs. +// fast-parse-object -- replace PDFObjectParser.prototype.parseObject +// with a first-byte-dispatch version that gates the three +// matchKeyword (true / false / null) scans behind a byte check. +// parseObject fires per dict value / array element / indirect +// object body; the upstream version pays three speculative +// matchKeyword fail-and-rewind costs on every invocation. Same +// semantics, dispatch reordered by observed frequency. +// fast-parse-name -- byte-keyed cache in front of +// PDFObjectParser.parseName. Upstream builds the name body via +// `name += charFromCode(byte)` per byte then hands the result +// to PDFName.of (fast-decode-name's string-keyed Map). 99.7 % of +// the 1.68 M calls per load on the book are cache hits -- the +// same ~5 k unique names show up over and over (Type, Length, +// Pages, MediaBox, ...) -- so the per-call string build + hash +// is pure overhead on the hot path. The shim scans bytes with +// direct buffer access, accumulates a small Smi hash, and +// looks up a `Map` keyed by byte content. On +// hit (~99.7 %) it returns the PDFName with zero string +// allocation; on miss it builds the string in one shot via +// String.fromCharCode and routes through the upstream +// PDFName.of (which is fast-decode-name's cache on this stack) +// so both caches converge on the same PDFName instance. ~80 ms +// of process wall-clock saved (-9 %) on the book, mostly on +// load (0.41 s -> 0.33 s). +1.3 MB long-lived heap for the +// cache itself, a small price for the load-time reduction. +// fast-sync-load -- rip the parseSpeed / objectsPerTick / +// shouldWaitForTick / waitForTick machinery out of both pdf-lib's +// load path (PDFDocument.load + five PDFParser / +// PDFObjectStreamParser methods underneath it) and its save path +// (PDFWriter.serializeToBuffer + computeBufferSize, plus the +// unreachable PDFStreamWriter.computeBufferSize patched for +// consistency). Each upstream method is wrapped in __awaiter so +// on browsers it can yield to the event loop every objectsPerTick +// objects; in Node the gate never fires but every indirect object +// still paid for the generator state machine + Promise +// allocation. ~135 ms of attributed parser self-time + ~40 ms +// writer + an unknowable chunk of the GC row removed; the +// parseSpeed / objectsPerTick options drop off all our call sites +// in step with this shim. +// fast-indirect-objects -- replace PDFContext.indirectObjects +// (Map) with a dense array indexed by +// objectNumber for the gen=0 path. After fast-dict-array shipped, +// PDFContext.assign's `this.indirectObjects.set(ref, object)` was +// the only hot Map.set left in the heap profile (~7 MB of set +// traffic from the parser's once-per-indirect-object assign). +// Mirror of the fast-refs trick on the value side: dense array +// for gen=0, Map fallback for gen!=0. enumerateIndirectObjects +// skips its sort when the gen!=0 Map is empty (the common case). +// Drops PDFContext.assign out of the CPU top-15 and halves the +// remaining set heap traffic. +// fast-pdfnumber-pool -- value-keyed cache in front of PDFNumber.of. +// Dense array for non-negative integers in [0, 16384), Map +// fallback for floats / negatives / out-of-range. PDFs reuse the +// same numeric values (page indices, /Count, /N, /MediaBox +// dimensions) hundreds of thousands of times against only a few +// thousand unique values; pooling collapses parseNumberOrRef's +// ~15 MB of PDFNumber allocations to ~0.8 MB. Total process-phase +// heap traffic drops ~13 % (123 MB -> 107 MB). PDFNumber is +// immutable so sharing is safe. +// measure-pass (Phase 1) -- no-allocate byte walker +// (docs/lib/measure-pass.mjs) that runs in front of +// PDFDocument.load on the raw Chrome PDF and counts dictSlots +// + arraySlots. The counts drive setExpectedDictSlots() on +// fast-dict-onebuf and setExpectedArraySlots() on +// fast-array-onebuf, pre-sizing each shim's backing Array to +// the exact measured demand (no V8 growth resizes during load). +// Net wall-clock is ~+40 ms on the book (walker costs ~60 ms; +// load saves ~20). The bound on mainBuf isn't material on its +// own (~60 K slots out of 2.4 M) but commits the two-pass +// shape. Phase 2/3/3β (Float64Array mainBuf + encoded slots) +// were explored and didn't ship -- per-slot encode/decode cost +// exceeded the mark-phase savings. See "Phase 1: pre-size +// mainBuf via measure-pass" in perf/notes/08-pdf-lib.md. +// fast-array-onebuf -- same range-view pattern as fast-dict-onebuf +// applied to PDFArray. Each PDFArray's per-instance +// `this.array = []` goes away; instances become views into a +// shared arrayMain (plain JS Array, heterogeneous slots holding +// the original PDFObject references). Reads are direct -- no +// decode, unlike the explored-but-didn't-ship encoded approach +// which encoded slots into a Float64Array and paid ~300 ms of +// decodeValue dispatch during save. ~19 MB of process-phase +// heap traffic from parseArray collapses (the `this.array` +// allocation + grow doublings across ~79 k PDFArrays). See +// "One-buffer PDFArray" in perf/notes/08-pdf-lib.md. +import './lib/fast-refs-class.mjs'; +import './lib/fast-inflate.mjs'; +import './lib/fast-parse-number.mjs'; +import './lib/fast-decode-name.mjs'; +import './lib/fast-number-to-string.mjs'; +import './lib/fast-size-in-bytes.mjs'; +import { setExpectedDictSlots } from './lib/fast-dict-onebuf.mjs'; +import { setExpectedArraySlots } from './lib/fast-array-onebuf.mjs'; +import './lib/fast-parse-object.mjs'; +import './lib/fast-parse-name.mjs'; +import './lib/fast-sync-load.mjs'; +import './lib/fast-indirect-objects.mjs'; +import './lib/fast-pdfnumber-pool.mjs'; +import { measure as measureRawPdf } from './lib/measure-pass.mjs'; import { parseOutline, setOutline } from './lib/outline.mjs'; import { setMetadata } from './lib/postprocesser.mjs'; +import { parallelSave } from './lib/parallel-deflate.mjs'; const __dirname = dirname(fileURLToPath(import.meta.url)); @@ -245,15 +407,33 @@ try { console.log(`generate: ${fmtMs(Date.now() - tGenerate)} (raw ${(rawPdf.length / 1024 / 1024).toFixed(1)} MB)`); // Process -- pdf-lib roundtrip with outline + metadata attached. - // parseSpeed: Fastest and objectsPerTick: Infinity are critical: - // pdf-lib's defaults yield to the event loop between every 100/50 - // objects, turning a ~5 s round-trip into ~40 s on a 50 MB PDF - // (~35 s of which is pure V8 idle). See perf/README.md. + // fast-sync-load strips the waitForTick yield gates on both load + // and save sides entirely (load was ~40 s under pdf-lib's Slow + // default that yields every 100 objects; ~5 s on Fastest; now + // ~1 s with the gates ripped out -- so parseSpeed / objectsPerTick + // no longer matter and drop from the call sites). + // + // parallelSave (vs the default pdfDoc.save): + // - objectsPerStream: 500 -- larger object-stream chunks compress + // better (shared deflate window), 5 % smaller output PDF, and + // cuts the per-chunk dispatch overhead 10x. + // - dispatches every chunk's deflate to libuv's thread pool via + // async zlib.deflate instead of running serially on the main + // thread. Moves ~300 ms of zlib work off-CPU on the book. + // + // measureRawPdf walks rawPdf once with no allocations and hands + // the exact dictSlot + arraySlot counts to fast-dict-onebuf / + // fast-array-onebuf so each shim's backing Array is pre-sized; + // eliminates V8 growth resizes during load. + // See perf/notes/08-pdf-lib.md. const tProcess = Date.now(); - const pdfDoc = await PDFDocument.load(rawPdf, { parseSpeed: ParseSpeeds.Fastest }); + const counts = measureRawPdf(rawPdf); + setExpectedDictSlots(counts.dictSlots); + setExpectedArraySlots(counts.arraySlots); + const pdfDoc = await PDFDocument.load(rawPdf); setMetadata(pdfDoc, meta); await setOutline(pdfDoc, outline, false); - const finalPdf = await pdfDoc.save({ objectsPerTick: Infinity }); + const { bytes: finalPdf } = await parallelSave(pdfDoc, { objectsPerStream: 500 }); console.log(`process: ${fmtMs(Date.now() - tProcess)}`); writeFileSync(outputPath, Buffer.from(finalPdf)); diff --git a/package-lock.json b/package-lock.json index 4d32d0a..3e17b77 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,8 +9,8 @@ "version": "0.0.0", "devDependencies": { "html-entities": "^2.6.0", - "pdf-lib": "^1.17.1", - "puppeteer": "^25.0.4" + "pdf-lib": "1.17.1", + "puppeteer": "25.0.4" } }, "node_modules/@babel/code-frame": { diff --git a/package.json b/package.json index ba6093a..3dce871 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,7 @@ "description": "PDF book pipeline and profiling harness for the twinBASIC documentation", "devDependencies": { "html-entities": "^2.6.0", - "pdf-lib": "^1.17.1", - "puppeteer": "^25.0.4" + "pdf-lib": "1.17.1", + "puppeteer": "25.0.4" } } diff --git a/perf/.gitignore b/perf/.gitignore index df01c96..001fa9e 100644 --- a/perf/.gitignore +++ b/perf/.gitignore @@ -1,3 +1,4 @@ results/ ab-css/ ab-css-*/ +raw.pdf diff --git a/perf/README.md b/perf/README.md index 81e67b6..f3637cc 100644 --- a/perf/README.md +++ b/perf/README.md @@ -61,6 +61,361 @@ Drop `--render-only` whenever you need to also measure generate / process (e.g. confirming a fix doesn't shift cost into `page.pdf()` or pdf-lib), or to write `book.pdf` for behavioural verification. +## Profiling pdf-lib (process phase): canonical command + +The mirror command for CPU-profiling the pdf-lib roundtrip (run from +`perf/`): + +``` +node measure.mjs --fast-refs-class --parallel-deflate --fast-decode-name --fast-number-to-string --fast-size-in-bytes --fast-inflate --fast-parse-number --fast-dict-onebuf --fast-array-onebuf --measure-pass --fast-parse-object --fast-parse-name --fast-sync-load --fast-indirect-objects --fast-pdfnumber-pool --cpu-profile-process --cpu-sampling 100 --out results/