|
| 1 | +// Adapted verbatim from pagedjs-cli 0.4.3 src/outline.js |
| 2 | +// (https://github.com/pagedjs/pagedjs-cli) -- MIT, Copyright (c) 2018 |
| 3 | +// Adam Hyde. Pulled in directly so we no longer need the pagedjs-cli |
| 4 | +// dependency. |
| 5 | +// |
| 6 | +// Two exports: |
| 7 | +// parseOutline(page, tags, enableWarnings) -- runs in the browser |
| 8 | +// via page.evaluate. Walks document.querySelectorAll(tags.join(',')) |
| 9 | +// to produce a nested outline tree of {title, destination, children}. |
| 10 | +// Also creates a hidden <a href="#id"> link-holder so Chrome |
| 11 | +// registers a named destination for each heading -- without that, |
| 12 | +// the named-destination Dest entries we write in setOutline would |
| 13 | +// point nowhere. |
| 14 | +// |
| 15 | +// setOutline(pdfDoc, outline, enableWarnings) -- runs in Node on the |
| 16 | +// parsed pdf-lib document. Walks the outline tree and writes a |
| 17 | +// /Outlines tree of PDF dicts using pdf-lib's low-level API |
| 18 | +// (PDFDict.fromMapWithContext, etc.). Each entry's Dest is a name |
| 19 | +// that Chrome's /Dests catalog entry resolves to a page+coords. |
| 20 | + |
| 21 | +import { PDFDict, PDFName, PDFNumber, PDFHexString } from "pdf-lib"; |
| 22 | +import { decode as htmlEntitiesDecode } from "html-entities"; |
| 23 | + |
| 24 | +const SanitizeXMLRx = /<[^>]+>/g; |
| 25 | + |
| 26 | +function sanitize (string) { |
| 27 | + if (string.includes("<")) { |
| 28 | + string = string.replace(SanitizeXMLRx, ""); |
| 29 | + } |
| 30 | + return htmlEntitiesDecode(string); |
| 31 | +} |
| 32 | + |
| 33 | +export async function parseOutline(page, tags, enableWarnings) { |
| 34 | + return await page.evaluate((tags) => { |
| 35 | + const tagsToProcess = []; |
| 36 | + for (const node of document.querySelectorAll(tags.join(","))) { |
| 37 | + tagsToProcess.push(node); |
| 38 | + } |
| 39 | + tagsToProcess.reverse(); |
| 40 | + |
| 41 | + const root = {children: [], depth: -1}; |
| 42 | + let currentOutlineNode = root; |
| 43 | + |
| 44 | + const linkHolder = document.createElement("div"); |
| 45 | + const body = document.querySelector("body"); |
| 46 | + linkHolder.style.display = "none"; |
| 47 | + body.insertBefore(linkHolder, body.firstChild); |
| 48 | + |
| 49 | + while (tagsToProcess.length > 0) { |
| 50 | + const tag = tagsToProcess.pop(); |
| 51 | + const orderDepth = tags.indexOf(tag.tagName.toLowerCase()); |
| 52 | + const dest = encodeURIComponent(tag.id).replace(/%/g, "#25"); |
| 53 | + |
| 54 | + // Add to link holder to register a destination |
| 55 | + const hiddenLink = document.createElement("a"); |
| 56 | + hiddenLink.href = "#"+dest; |
| 57 | + linkHolder.appendChild(hiddenLink); |
| 58 | + |
| 59 | + if (orderDepth < currentOutlineNode.depth) { |
| 60 | + currentOutlineNode = currentOutlineNode.parent; |
| 61 | + tagsToProcess.push(tag); |
| 62 | + } else { |
| 63 | + const newNode = { |
| 64 | + title: tag.innerText.trim(), |
| 65 | + // encode section ID until https://bugs.chromium.org/p/chromium/issues/detail?id=985254 is fixed |
| 66 | + destination: dest, |
| 67 | + children: [], |
| 68 | + depth: orderDepth, |
| 69 | + }; |
| 70 | + if (orderDepth == currentOutlineNode.depth) { |
| 71 | + if (currentOutlineNode.parent) { |
| 72 | + newNode.parent = currentOutlineNode.parent; |
| 73 | + currentOutlineNode.parent.children.push(newNode); |
| 74 | + } else { |
| 75 | + newNode.parent = currentOutlineNode; |
| 76 | + currentOutlineNode.children.push(newNode); |
| 77 | + } |
| 78 | + currentOutlineNode = newNode; |
| 79 | + } else if (orderDepth > currentOutlineNode.depth) { |
| 80 | + newNode.parent = currentOutlineNode; |
| 81 | + currentOutlineNode.children.push(newNode); |
| 82 | + currentOutlineNode = newNode; |
| 83 | + } |
| 84 | + } |
| 85 | + } |
| 86 | + |
| 87 | + const stripParentProperty = (node) => { |
| 88 | + node.parent = undefined; |
| 89 | + for (const child of node.children) { |
| 90 | + stripParentProperty(child); |
| 91 | + } |
| 92 | + }; |
| 93 | + stripParentProperty(root); |
| 94 | + return root.children; |
| 95 | + }, tags); |
| 96 | +} |
| 97 | + |
| 98 | +function setRefsForOutlineItems (layer, context, parentRef) { |
| 99 | + for (const item of layer) { |
| 100 | + item.ref = context.nextRef(); |
| 101 | + item.parentRef = parentRef; |
| 102 | + setRefsForOutlineItems(item.children, context, item.ref); |
| 103 | + } |
| 104 | +} |
| 105 | + |
| 106 | +function countChildrenOfOutline (layer) { |
| 107 | + let count = 0; |
| 108 | + for (const item of layer) { |
| 109 | + ++count; |
| 110 | + count += countChildrenOfOutline(item.children); |
| 111 | + } |
| 112 | + return count; |
| 113 | +} |
| 114 | + |
| 115 | +function buildPdfObjectsForOutline (layer, context) { |
| 116 | + for (const [i, item] of layer.entries()) { |
| 117 | + const prev = layer[i - 1]; |
| 118 | + const next = layer[i + 1]; |
| 119 | + |
| 120 | + const pdfObject = new Map([ |
| 121 | + [PDFName.of("Title"), PDFHexString.fromText(sanitize(item.title))], |
| 122 | + [PDFName.of("Dest"), PDFName.of(item.destination)], |
| 123 | + [PDFName.of("Parent"), item.parentRef] |
| 124 | + ]); |
| 125 | + if (prev) { |
| 126 | + pdfObject.set(PDFName.of("Prev"), prev.ref); |
| 127 | + } |
| 128 | + if (next) { |
| 129 | + pdfObject.set(PDFName.of("Next"), next.ref); |
| 130 | + } |
| 131 | + if (item.children.length > 0) { |
| 132 | + pdfObject.set(PDFName.of("First"), item.children[0].ref); |
| 133 | + pdfObject.set(PDFName.of("Last"), item.children[item.children.length - 1].ref); |
| 134 | + pdfObject.set(PDFName.of("Count"), PDFNumber.of(countChildrenOfOutline(item.children))); |
| 135 | + } |
| 136 | + |
| 137 | + context.assign(item.ref, PDFDict.fromMapWithContext(pdfObject, context)); |
| 138 | + |
| 139 | + buildPdfObjectsForOutline(item.children, context); |
| 140 | + } |
| 141 | +} |
| 142 | + |
| 143 | +function generateWarningsAboutMissingDestinations (layer, pdfDoc) { |
| 144 | + const dests = pdfDoc.context.lookup(pdfDoc.catalog.get(PDFName.of("Dests"))); |
| 145 | + // Dests can be undefined if the PDF wasn't successfully generated (for instance if Paged.js threw an exception) |
| 146 | + if (dests) { |
| 147 | + const validDestinationTargets = dests.entries().map(([key, _]) => key.value()); |
| 148 | + for (const item of layer) { |
| 149 | + if (item.destination && !validDestinationTargets.includes("/" + item.destination)) { |
| 150 | + console.warn(`Unable to find destination "${item.destination}" while generating PDF outline.`); |
| 151 | + } |
| 152 | + generateWarningsAboutMissingDestinations(item.children, pdfDoc); |
| 153 | + } |
| 154 | + } |
| 155 | +} |
| 156 | + |
| 157 | +export async function setOutline (pdfDoc, outline, enableWarnings=false) { |
| 158 | + const context = pdfDoc.context; |
| 159 | + const outlineRef = context.nextRef(); |
| 160 | + |
| 161 | + if (outline.length === 0) { |
| 162 | + return pdfDoc; |
| 163 | + } |
| 164 | + |
| 165 | + if (enableWarnings) { |
| 166 | + generateWarningsAboutMissingDestinations(outline, pdfDoc); |
| 167 | + } |
| 168 | + |
| 169 | + setRefsForOutlineItems(outline, context, outlineRef); |
| 170 | + buildPdfObjectsForOutline(outline, context); |
| 171 | + |
| 172 | + const outlineObject = PDFDict.fromMapWithContext(new Map([ |
| 173 | + [PDFName.of("First"), outline[0].ref], |
| 174 | + [PDFName.of("Last"), outline[outline.length - 1].ref], |
| 175 | + [PDFName.of("Count"), PDFNumber.of(countChildrenOfOutline(outline))] |
| 176 | + ]), context); |
| 177 | + context.assign(outlineRef, outlineObject); |
| 178 | + |
| 179 | + pdfDoc.catalog.set(PDFName.of("Outlines"), outlineRef); |
| 180 | + return pdfDoc; |
| 181 | +} |
0 commit comments