|
| 1 | +// One-buffer PDFArray: every committed element lives in a single |
| 2 | +// append-only JS Array (arrayMain), kept for the document's lifetime. |
| 3 | +// Mirror of fast-dict-onebuf's strategy applied to PDFArray. Backing |
| 4 | +// is a plain heterogeneous JS Array -- slots hold the original |
| 5 | +// PDFObject references directly. No encoding, no decode on read; the |
| 6 | +// hot path is `arrayMain[start + i]`. |
| 7 | +// |
| 8 | +// Phase 3 of fast-dict-encoded did the same range-view refactor on |
| 9 | +// PDFArray but used a Float64Array + encoded slots (mirroring its |
| 10 | +// dict shape). The encoded backing cost ~300 ms of decodeValue |
| 11 | +// dispatch during save (PDFArray.copyBytesInto iterates ~500 k |
| 12 | +// elements). This shim keeps the heap win (~19 MB on the book by |
| 13 | +// removing each PDFArray's per-instance `this.array = []`) without |
| 14 | +// paying the decode cost: slots are JS references, reads are direct. |
| 15 | +// |
| 16 | +// 40-bit packed Number layout (well within Number.MAX_SAFE_INTEGER): |
| 17 | +// bits 0-23: start (24 bits, max 16 M slots in arrayMain) |
| 18 | +// bits 24-39: length (16 bits, max 65 536 elements; max observed |
| 19 | +// ~25 k on the book) |
| 20 | +// bits 40-52: spare (13 bits) |
| 21 | +// |
| 22 | +// Recursion. parseArray pushes elements onto a per-parser _arrayTemp; |
| 23 | +// inner parseArray invocations append on top, commit their frame to |
| 24 | +// arrayMain in one append, and pop temp back. Inner / outer ranges |
| 25 | +// in arrayMain do not overlap. _arrayTemp is independent of |
| 26 | +// fast-dict-onebuf's _dictTemp so dict <-> array recursion is fine. |
| 27 | +// |
| 28 | +// Mutations: |
| 29 | +// - set(i, v): in-place replace (safe; no length change) |
| 30 | +// - push(v) at HWM: in-place extend (no other arrays follow) |
| 31 | +// - push(v) not at HWM: COW the range to tail, then push |
| 32 | +// - insert / remove: always COW (shifts would corrupt neighbours) |
| 33 | +// Same at-HWM-determines-safety logic as fast-dict-onebuf; no owned |
| 34 | +// bit needed (see fast-dict-onebuf commit 7e8b1f7). |
| 35 | +// |
| 36 | +// Singleton PDFContext (one PDFDocument.load per process in our |
| 37 | +// pipeline). The singleton is duplicated rather than shared with |
| 38 | +// fast-dict-onebuf -- the mechanism is ten lines and keeping each |
| 39 | +// shim independently injectable is worth more than dedup'ing it. |
| 40 | +// Both shims end up holding references to the same PDFContext. |
| 41 | +// |
| 42 | +// Composes with --fast-dict-onebuf. Mutually exclusive with |
| 43 | +// --fast-dict-encoded (which subsumes both via its own encoded shape). |
| 44 | + |
| 45 | +import { createRequire } from 'node:module'; |
| 46 | + |
| 47 | +const require = createRequire(import.meta.url); |
| 48 | +const PDFArray = require('pdf-lib/cjs/core/objects/PDFArray.js').default; |
| 49 | +const PDFObjectParser = require('pdf-lib/cjs/core/parser/PDFObjectParser.js').default; |
| 50 | +const CharCodes = require('pdf-lib/cjs/core/syntax/CharCodes.js').default; |
| 51 | + |
| 52 | +// ---- The single buffer --------------------------------------------- |
| 53 | + |
| 54 | +// Pre-sized to total array slots + slack on the book. Other workloads |
| 55 | +// grow it naturally from this starting size. When the measure-pass |
| 56 | +// shim runs first, it calls setExpectedArraySlots() before parse, |
| 57 | +// which resizes `arrayMain` to exact measured demand via |
| 58 | +// `arrayMain.length = N`. |
| 59 | +const ARRAY_MAIN_INITIAL_CAP = 800000; |
| 60 | +const arrayMain = new Array(ARRAY_MAIN_INITIAL_CAP); |
| 61 | +let arrayMainLen = 0; |
| 62 | + |
| 63 | +export { arrayMain }; |
| 64 | +export function getArrayMainLen() { return arrayMainLen; } |
| 65 | + |
| 66 | +// Resize arrayMain in place. Must be called before any parseArray / |
| 67 | +// withContext (i.e. while arrayMainLen is still 0). `slack` is a |
| 68 | +// multiplier on `slots`; default 1.0 (exact). Same in-place-resize |
| 69 | +// rationale as fast-dict-onebuf's setExpectedDictSlots: reassigning |
| 70 | +// the module-level binding invalidates V8's inline-cache slots in |
| 71 | +// every closure that reads it, and the deopt + recompile shows up as |
| 72 | +// a parse-time allocation spike. |
| 73 | +export function setExpectedArraySlots(slots, slack = 1.0) { |
| 74 | + if (arrayMainLen > 0) { |
| 75 | + throw new Error( |
| 76 | + `fast-array-onebuf: setExpectedArraySlots called after parse started (arrayMainLen=${arrayMainLen})`, |
| 77 | + ); |
| 78 | + } |
| 79 | + arrayMain.length = Math.ceil(slots * slack); |
| 80 | +} |
| 81 | + |
| 82 | +// ---- Bit-packing helpers ------------------------------------------- |
| 83 | + |
| 84 | +const POW_24 = 16777216; // 2^24 |
| 85 | +const MASK_24 = 0xFFFFFF; |
| 86 | +const MASK_16 = 0xFFFF; |
| 87 | + |
| 88 | +const MAX_START = POW_24; // exclusive |
| 89 | +const MAX_LENGTH = 1 << 16; // 65 536, exclusive |
| 90 | + |
| 91 | +function pack(start, length) { |
| 92 | + if (start >= MAX_START) throw new Error(`fast-array-onebuf: start ${start} exceeds 24-bit budget`); |
| 93 | + if (length >= MAX_LENGTH) throw new Error(`fast-array-onebuf: length ${length} exceeds 16-bit budget`); |
| 94 | + return start + length * POW_24; |
| 95 | +} |
| 96 | + |
| 97 | +function _start(d) { return d & MASK_24; } |
| 98 | +function _length(d) { return Math.floor(d / POW_24) & MASK_16; } |
| 99 | + |
| 100 | +// ---- Singleton context --------------------------------------------- |
| 101 | + |
| 102 | +let _singletonContext = null; |
| 103 | + |
| 104 | +function _registerContext(ctx) { |
| 105 | + if (_singletonContext === null) { |
| 106 | + _singletonContext = ctx; |
| 107 | + } else if (_singletonContext !== ctx) { |
| 108 | + throw new Error('fast-array-onebuf: expected a singleton PDFContext, got a second distinct one.'); |
| 109 | + } |
| 110 | +} |
| 111 | + |
| 112 | +// ---- Append + COW helpers ------------------------------------------ |
| 113 | + |
| 114 | +function _appendFromTemp(temp, fromOffset, lenSlots) { |
| 115 | + for (let i = 0; i < lenSlots; i++) { |
| 116 | + arrayMain[arrayMainLen + i] = temp[fromOffset + i]; |
| 117 | + } |
| 118 | + arrayMainLen += lenSlots; |
| 119 | +} |
| 120 | + |
| 121 | +function _appendArray(arr) { |
| 122 | + const len = arr.length; |
| 123 | + for (let i = 0; i < len; i++) arrayMain[arrayMainLen + i] = arr[i]; |
| 124 | + arrayMainLen += len; |
| 125 | +} |
| 126 | + |
| 127 | +// COW: copy this array's range to arrayMain's tail. If already at |
| 128 | +// the HWM, nothing to copy -- return d unchanged. |
| 129 | +function _cow(pa) { |
| 130 | + const d = pa.d; |
| 131 | + const start = _start(d); |
| 132 | + const length = _length(d); |
| 133 | + if (start + length === arrayMainLen) return d; // at HWM |
| 134 | + const newStart = arrayMainLen; |
| 135 | + for (let i = 0; i < length; i++) arrayMain[arrayMainLen + i] = arrayMain[start + i]; |
| 136 | + arrayMainLen += length; |
| 137 | + return pack(newStart, length); |
| 138 | +} |
| 139 | + |
| 140 | +// ---- Construction -------------------------------------------------- |
| 141 | +// |
| 142 | +// Use a plain-function constructor (`_FastArray`) with the prototype |
| 143 | +// aliased to PDFArray.prototype instead of `Object.create + writes`. |
| 144 | +// Same shape change fast-refs-class and fast-dict-onebuf made: V8 |
| 145 | +// gives `new`-built instances a stable hidden class from the first |
| 146 | +// instance and drops per-instance cost vs the slow-property path |
| 147 | +// taken by Object.create + later property writes. |
| 148 | +// |
| 149 | +// No subclass dispatch needed -- PDFArray has no subclasses in |
| 150 | +// pdf-lib (unlike PDFDict's PDFCatalog / PDFPageTree / PDFPageLeaf). |
| 151 | + |
| 152 | +function _FastArray(d) { this.d = d; } |
| 153 | +_FastArray.prototype = PDFArray.prototype; |
| 154 | + |
| 155 | +function _makeFromRange(start, length, ctx) { |
| 156 | + _registerContext(ctx); |
| 157 | + return new _FastArray(pack(start, length)); |
| 158 | +} |
| 159 | + |
| 160 | +function _makeFromAppend(arr, ctx) { |
| 161 | + const start = arrayMainLen; |
| 162 | + _appendArray(arr); |
| 163 | + return _makeFromRange(start, arr.length, ctx); |
| 164 | +} |
| 165 | + |
| 166 | +if (!PDFArray.prototype.__fastArrayOnebufInstalled) { |
| 167 | + |
| 168 | + // ---- PDFArray.prototype ----------------------------------------- |
| 169 | + |
| 170 | + PDFArray.prototype.size = function () { |
| 171 | + return _length(this.d); |
| 172 | + }; |
| 173 | + |
| 174 | + PDFArray.prototype.push = function (object) { |
| 175 | + const d0 = this.d; |
| 176 | + const start0 = _start(d0); |
| 177 | + const length0 = _length(d0); |
| 178 | + let dNow = d0; |
| 179 | + if (start0 + length0 !== arrayMainLen) { |
| 180 | + dNow = _cow(this); |
| 181 | + } |
| 182 | + arrayMain[arrayMainLen++] = object; |
| 183 | + const start = _start(dNow); |
| 184 | + this.d = pack(start, length0 + 1); |
| 185 | + }; |
| 186 | + |
| 187 | + PDFArray.prototype.get = function (index) { |
| 188 | + return arrayMain[_start(this.d) + index]; |
| 189 | + }; |
| 190 | + |
| 191 | + PDFArray.prototype.set = function (index, object) { |
| 192 | + arrayMain[_start(this.d) + index] = object; |
| 193 | + }; |
| 194 | + |
| 195 | + PDFArray.prototype.indexOf = function (object) { |
| 196 | + const d = this.d; |
| 197 | + const start = _start(d); |
| 198 | + const length = _length(d); |
| 199 | + for (let i = 0; i < length; i++) { |
| 200 | + if (arrayMain[start + i] === object) return i; |
| 201 | + } |
| 202 | + return undefined; |
| 203 | + }; |
| 204 | + |
| 205 | + PDFArray.prototype.insert = function (index, object) { |
| 206 | + // Always COW -- shifting elements in place would corrupt other |
| 207 | + // arrays' ranges past this one. |
| 208 | + const d0 = this.d; |
| 209 | + const start0 = _start(d0); |
| 210 | + const length0 = _length(d0); |
| 211 | + const newStart = arrayMainLen; |
| 212 | + for (let i = 0; i < index; i++) { |
| 213 | + arrayMain[arrayMainLen++] = arrayMain[start0 + i]; |
| 214 | + } |
| 215 | + arrayMain[arrayMainLen++] = object; |
| 216 | + for (let i = index; i < length0; i++) { |
| 217 | + arrayMain[arrayMainLen++] = arrayMain[start0 + i]; |
| 218 | + } |
| 219 | + this.d = pack(newStart, length0 + 1); |
| 220 | + }; |
| 221 | + |
| 222 | + PDFArray.prototype.remove = function (index) { |
| 223 | + // Always COW (same reason as insert). |
| 224 | + const d0 = this.d; |
| 225 | + const start0 = _start(d0); |
| 226 | + const length0 = _length(d0); |
| 227 | + const newStart = arrayMainLen; |
| 228 | + for (let i = 0; i < length0; i++) { |
| 229 | + if (i === index) continue; |
| 230 | + arrayMain[arrayMainLen++] = arrayMain[start0 + i]; |
| 231 | + } |
| 232 | + this.d = pack(newStart, length0 - 1); |
| 233 | + }; |
| 234 | + |
| 235 | + PDFArray.prototype.asArray = function () { |
| 236 | + const d = this.d; |
| 237 | + const start = _start(d); |
| 238 | + const length = _length(d); |
| 239 | + const out = new Array(length); |
| 240 | + for (let i = 0; i < length; i++) out[i] = arrayMain[start + i]; |
| 241 | + return out; |
| 242 | + }; |
| 243 | + |
| 244 | + PDFArray.prototype.clone = function (context) { |
| 245 | + const d = this.d; |
| 246 | + const start = _start(d); |
| 247 | + const length = _length(d); |
| 248 | + const newStart = arrayMainLen; |
| 249 | + for (let i = 0; i < length; i++) arrayMain[arrayMainLen + i] = arrayMain[start + i]; |
| 250 | + arrayMainLen += length; |
| 251 | + _registerContext(context || _singletonContext); |
| 252 | + return new _FastArray(pack(newStart, length)); |
| 253 | + }; |
| 254 | + |
| 255 | + PDFArray.prototype.toString = function () { |
| 256 | + const d = this.d; |
| 257 | + const start = _start(d); |
| 258 | + const length = _length(d); |
| 259 | + let s = '[ '; |
| 260 | + for (let i = 0; i < length; i++) s += arrayMain[start + i].toString() + ' '; |
| 261 | + return s + ']'; |
| 262 | + }; |
| 263 | + |
| 264 | + PDFArray.prototype.sizeInBytes = function () { |
| 265 | + const d = this.d; |
| 266 | + const start = _start(d); |
| 267 | + const end = start + _length(d); |
| 268 | + let size = 3; |
| 269 | + for (let i = start; i < end; i++) size += arrayMain[i].sizeInBytes() + 1; |
| 270 | + return size; |
| 271 | + }; |
| 272 | + |
| 273 | + PDFArray.prototype.copyBytesInto = function (buffer, offset) { |
| 274 | + const initialOffset = offset; |
| 275 | + buffer[offset++] = CharCodes.LeftSquareBracket; |
| 276 | + buffer[offset++] = CharCodes.Space; |
| 277 | + const d = this.d; |
| 278 | + const start = _start(d); |
| 279 | + const end = start + _length(d); |
| 280 | + for (let i = start; i < end; i++) { |
| 281 | + offset += arrayMain[i].copyBytesInto(buffer, offset); |
| 282 | + buffer[offset++] = CharCodes.Space; |
| 283 | + } |
| 284 | + buffer[offset++] = CharCodes.RightSquareBracket; |
| 285 | + return offset - initialOffset; |
| 286 | + }; |
| 287 | + |
| 288 | + // lookup, lookupMaybe, asRectangle, scalePDFNumbers stay on the |
| 289 | + // upstream prototype -- they call this.get / this.size / this.set |
| 290 | + // and dispatch through our overrides. |
| 291 | + |
| 292 | + Object.defineProperty(PDFArray.prototype, 'context', { |
| 293 | + get() { return _singletonContext; }, |
| 294 | + set(_ctx) { /* singleton is source of truth */ }, |
| 295 | + configurable: true, |
| 296 | + }); |
| 297 | + |
| 298 | + // ---- PDFArray factory ------------------------------------------- |
| 299 | + |
| 300 | + PDFArray.withContext = function (context) { |
| 301 | + return _makeFromAppend([], context); |
| 302 | + }; |
| 303 | + |
| 304 | + // ---- PDFObjectParser.prototype.parseArray ----------------------- |
| 305 | + // |
| 306 | + // Same temp/commit pattern as fast-dict-onebuf's parseDict: |
| 307 | + // each parser instance carries its own _arrayTemp + length cursor; |
| 308 | + // parseArray pushes elements onto temp's tail, commits the frame |
| 309 | + // to arrayMain in one contiguous append, pops temp back to |
| 310 | + // frameStart, returns a PDFArray view into arrayMain. |
| 311 | + |
| 312 | + PDFObjectParser.prototype.parseArray = function fastParseArrayOneBuf() { |
| 313 | + const bytes = this.bytes; |
| 314 | + bytes.assertNext(CharCodes.LeftSquareBracket); |
| 315 | + this.skipWhitespaceAndComments(); |
| 316 | + |
| 317 | + if (this._arrayTemp === undefined) { |
| 318 | + this._arrayTemp = new Array(64); // grows naturally if needed |
| 319 | + this._arrayTempLen = 0; |
| 320 | + } |
| 321 | + const temp = this._arrayTemp; |
| 322 | + const frameStart = this._arrayTempLen; |
| 323 | + |
| 324 | + while (bytes.peek() !== CharCodes.RightSquareBracket) { |
| 325 | + const element = this.parseObject(); // may recurse |
| 326 | + temp[this._arrayTempLen++] = element; |
| 327 | + this.skipWhitespaceAndComments(); |
| 328 | + } |
| 329 | + bytes.assertNext(CharCodes.RightSquareBracket); |
| 330 | + |
| 331 | + const frameLen = this._arrayTempLen - frameStart; |
| 332 | + const start = arrayMainLen; |
| 333 | + _appendFromTemp(temp, frameStart, frameLen); |
| 334 | + this._arrayTempLen = frameStart; |
| 335 | + |
| 336 | + return _makeFromRange(start, frameLen, this.context); |
| 337 | + }; |
| 338 | + |
| 339 | + PDFArray.prototype.__fastArrayOnebufInstalled = true; |
| 340 | +} |
0 commit comments