|
| 1 | +/** |
| 2 | + * Emoji render gate — proves emoji code points render as real color glyphs in |
| 3 | + * the output PDF instead of .notdef tofu boxes (▯). This is the regression gate |
| 4 | + * for fix/make-pdf-emoji-tofu. |
| 5 | + * |
| 6 | + * Why not just check pdftotext? Because text extraction is a FALSE oracle for |
| 7 | + * emoji: Skia preserves the Unicode in the text cluster even when the displayed |
| 8 | + * glyph is .notdef, so pdftotext can report the emoji survived on a render that |
| 9 | + * actually drew tofu. Verified empirically on macOS — pdftotext extracts 😀 |
| 10 | + * regardless of whether a color font was available. |
| 11 | + * |
| 12 | + * Two assertions that DO distinguish a real render from tofu: |
| 13 | + * 1. pdffonts shows an emoji family embedded in the PDF (the cascade selected |
| 14 | + * a real emoji font — AppleColorEmoji as Type 3 on macOS, NotoColorEmoji |
| 15 | + * on Linux). Missing-fallback => no emoji font embedded. |
| 16 | + * 2. pdftoppm rasterizes the page and we count saturated (colored) pixels. |
| 17 | + * A color-emoji render has hundreds (measured: ~1650 at 100dpi); a tofu |
| 18 | + * render is a monochrome black outline on white (~0 saturated). Tolerant |
| 19 | + * threshold, not an exact-pixel fixture diff, to dodge cross-platform AA |
| 20 | + * and font-version variance. |
| 21 | + * |
| 22 | + * Note: pdfimages -list is intentionally NOT used — macOS embeds color emoji as |
| 23 | + * Type 3 fonts, so pdfimages lists nothing even on a correct render. |
| 24 | + * |
| 25 | + * Gating: runs only when the compiled binary + browse + pdffonts + pdftoppm are |
| 26 | + * available AND a color-emoji font is installed for Chromium to fall back to. |
| 27 | + * In CI (process.env.CI set) missing prerequisites are a HARD FAILURE, not a |
| 28 | + * skip — CI is expected to install poppler-utils + fonts-noto-color-emoji, so a |
| 29 | + * silent skip there would let the tofu regression ship behind a green build. |
| 30 | + * Local dev without those tools skips cleanly. |
| 31 | + */ |
| 32 | + |
| 33 | +import { describe, expect, test } from "bun:test"; |
| 34 | +import { execFileSync } from "node:child_process"; |
| 35 | +import * as fs from "node:fs"; |
| 36 | +import * as path from "node:path"; |
| 37 | + |
| 38 | +import { resolvePopplerTool } from "../../src/pdftotext"; |
| 39 | + |
| 40 | +const FIXTURE = path.resolve(__dirname, "../fixtures/emoji-gate.md"); |
| 41 | +const ROOT = path.resolve(__dirname, "../../.."); |
| 42 | +const PDF_BIN = path.join(ROOT, "make-pdf/dist/pdf"); |
| 43 | +const BROWSE_BIN = path.join(ROOT, "browse/dist/browse"); |
| 44 | + |
| 45 | +// Saturated-pixel floor. Measured ~1650 at 100dpi for the fixture's color |
| 46 | +// emoji; a tofu render yields ~0. 200 sits well clear of both. |
| 47 | +const SATURATED_PIXEL_FLOOR = 200; |
| 48 | +// A pixel is "colored" when its max-min channel spread exceeds this. Black text, |
| 49 | +// gray rules, and white background all stay near 0; color emoji spike high. |
| 50 | +const SATURATION_DELTA = 40; |
| 51 | +// Per-child wall-clock bound. Bun's test timeout doesn't reliably interrupt a |
| 52 | +// synchronous execFileSync, so each child gets its own ceiling — a wedged |
| 53 | +// browser/poppler binary (or a hostile GSTACK_*_BIN override) fails instead of |
| 54 | +// hanging the whole job. |
| 55 | +const CHILD_TIMEOUT_MS = 25_000; |
| 56 | + |
| 57 | +/** Is a color-emoji font available for Chromium to fall back to? */ |
| 58 | +function emojiFontAvailable(): boolean { |
| 59 | + if (process.platform === "darwin") { |
| 60 | + return fs.existsSync("/System/Library/Fonts/Apple Color Emoji.ttc"); |
| 61 | + } |
| 62 | + if (process.platform === "linux") { |
| 63 | + const fcMatch = Bun.which("fc-match"); |
| 64 | + if (!fcMatch) return false; |
| 65 | + try { |
| 66 | + const out = execFileSync( |
| 67 | + fcMatch, |
| 68 | + ["-f", "%{color}\n", ":lang=und-zsye:charset=1F600"], |
| 69 | + { encoding: "utf8", timeout: CHILD_TIMEOUT_MS }, |
| 70 | + ); |
| 71 | + return /true/i.test(out); |
| 72 | + } catch { |
| 73 | + return false; |
| 74 | + } |
| 75 | + } |
| 76 | + return false; |
| 77 | +} |
| 78 | + |
| 79 | +function prerequisitesAvailable(): { ok: true } | { ok: false; reason: string } { |
| 80 | + if (!fs.existsSync(PDF_BIN)) return { ok: false, reason: `make-pdf binary missing (${PDF_BIN}). Run bun run build.` }; |
| 81 | + if (!fs.existsSync(BROWSE_BIN)) return { ok: false, reason: `browse binary missing (${BROWSE_BIN}).` }; |
| 82 | + if (!fs.existsSync(FIXTURE)) return { ok: false, reason: `fixture missing (${FIXTURE}).` }; |
| 83 | + if (!resolvePopplerTool("pdffonts")) return { ok: false, reason: "pdffonts not found (install poppler-utils)." }; |
| 84 | + if (!resolvePopplerTool("pdftoppm")) return { ok: false, reason: "pdftoppm not found (install poppler-utils)." }; |
| 85 | + if (!emojiFontAvailable()) return { ok: false, reason: "no color-emoji font installed; run ./setup (Linux) or install one." }; |
| 86 | + return { ok: true }; |
| 87 | +} |
| 88 | + |
| 89 | +/** |
| 90 | + * Count pixels in a P6 (binary) PPM whose RGB channel spread exceeds delta. |
| 91 | + * Validates the header and buffer length so malformed/variant output is a hard |
| 92 | + * diagnostic (thrown), never a silently-wrong count. |
| 93 | + */ |
| 94 | +function countSaturatedPixels(ppmPath: string, delta: number): number { |
| 95 | + const b = fs.readFileSync(ppmPath); |
| 96 | + let i = 0; |
| 97 | + const skipWhitespaceAndComments = () => { |
| 98 | + for (;;) { |
| 99 | + while (i < b.length && (b[i] === 0x20 || b[i] === 0x0a || b[i] === 0x09 || b[i] === 0x0d)) i++; |
| 100 | + if (b[i] === 0x23) { // '#': comment runs to end of line |
| 101 | + while (i < b.length && b[i] !== 0x0a) i++; |
| 102 | + continue; |
| 103 | + } |
| 104 | + break; |
| 105 | + } |
| 106 | + }; |
| 107 | + const token = (): string => { |
| 108 | + skipWhitespaceAndComments(); |
| 109 | + const s = i; |
| 110 | + while (i < b.length && b[i] !== 0x20 && b[i] !== 0x0a && b[i] !== 0x09 && b[i] !== 0x0d) i++; |
| 111 | + return b.slice(s, i).toString("ascii"); |
| 112 | + }; |
| 113 | + const magic = token(); |
| 114 | + if (magic !== "P6") throw new Error(`expected P6 PPM, got "${magic}"`); |
| 115 | + const w = Number(token()); |
| 116 | + const h = Number(token()); |
| 117 | + const maxval = Number(token()); |
| 118 | + if (!Number.isInteger(w) || w <= 0 || !Number.isInteger(h) || h <= 0) { |
| 119 | + throw new Error(`invalid PPM dimensions: ${w}x${h}`); |
| 120 | + } |
| 121 | + if (maxval !== 255) { |
| 122 | + // pdftoppm emits 8-bit P6 (maxval 255). 16-bit would be 2 bytes/channel and |
| 123 | + // would break the byte math below — fail loudly rather than miscount. |
| 124 | + throw new Error(`unexpected PPM maxval ${maxval} (expected 255)`); |
| 125 | + } |
| 126 | + i++; // single whitespace byte after maxval precedes the pixel block |
| 127 | + const total = w * h; |
| 128 | + if (b.length - i < total * 3) { |
| 129 | + throw new Error(`PPM pixel buffer too short: have ${b.length - i}, need ${total * 3}`); |
| 130 | + } |
| 131 | + let sat = 0; |
| 132 | + for (let p = 0; p < total; p++) { |
| 133 | + const o = i + p * 3; |
| 134 | + const r = b[o], g = b[o + 1], bl = b[o + 2]; |
| 135 | + if (Math.max(r, g, bl) - Math.min(r, g, bl) > delta) sat++; |
| 136 | + } |
| 137 | + return sat; |
| 138 | +} |
| 139 | + |
| 140 | +describe("emoji render gate", () => { |
| 141 | + const avail = prerequisitesAvailable(); |
| 142 | + |
| 143 | + test.skipIf(!avail.ok)("emoji render as color glyphs, not tofu", () => { |
| 144 | + if (!avail.ok) return; // type narrowing |
| 145 | + // Private temp dir under /tmp: browse's validateOutputPath only allows |
| 146 | + // /tmp and /private/tmp (not os.tmpdir()'s /var/folders), and mkdtemp |
| 147 | + // dodges the predictable-path symlink/collision risk. |
| 148 | + const workDir = fs.mkdtempSync("/tmp/make-pdf-emoji-gate-"); |
| 149 | + const outputPdf = path.join(workDir, "out.pdf"); |
| 150 | + const ppmPrefix = path.join(workDir, "page"); |
| 151 | + const ppmPath = `${ppmPrefix}.ppm`; |
| 152 | + try { |
| 153 | + execFileSync(PDF_BIN, ["generate", FIXTURE, outputPdf, "--quiet"], { |
| 154 | + encoding: "utf8", |
| 155 | + env: { ...process.env, BROWSE_BIN }, |
| 156 | + stdio: ["ignore", "pipe", "pipe"], |
| 157 | + timeout: CHILD_TIMEOUT_MS, |
| 158 | + }); |
| 159 | + expect(fs.existsSync(outputPdf)).toBe(true); |
| 160 | + |
| 161 | + // 1. An emoji family must be embedded — the cascade found a real emoji |
| 162 | + // font instead of falling through to .notdef. |
| 163 | + const pdffonts = resolvePopplerTool("pdffonts")!; |
| 164 | + const fontList = execFileSync(pdffonts, [outputPdf], { encoding: "utf8", timeout: CHILD_TIMEOUT_MS }); |
| 165 | + if (!/emoji/i.test(fontList)) { |
| 166 | + process.stderr.write(`\n--- pdffonts ---\n${fontList}\n--- END ---\n`); |
| 167 | + } |
| 168 | + expect(/emoji/i.test(fontList)).toBe(true); |
| 169 | + |
| 170 | + // 2. The page must actually rasterize to color, not a monochrome tofu box. |
| 171 | + const pdftoppm = resolvePopplerTool("pdftoppm")!; |
| 172 | + execFileSync(pdftoppm, ["-r", "100", "-singlefile", outputPdf, ppmPrefix], { |
| 173 | + stdio: ["ignore", "pipe", "pipe"], |
| 174 | + timeout: CHILD_TIMEOUT_MS, |
| 175 | + }); |
| 176 | + expect(fs.existsSync(ppmPath)).toBe(true); |
| 177 | + const saturated = countSaturatedPixels(ppmPath, SATURATION_DELTA); |
| 178 | + if (saturated < SATURATED_PIXEL_FLOOR) { |
| 179 | + process.stderr.write(`\n[emoji-gate] saturated pixels: ${saturated} (floor ${SATURATED_PIXEL_FLOOR})\n`); |
| 180 | + } |
| 181 | + expect(saturated).toBeGreaterThanOrEqual(SATURATED_PIXEL_FLOOR); |
| 182 | + } finally { |
| 183 | + try { fs.rmSync(workDir, { recursive: true, force: true }); } catch { /* ignore */ } |
| 184 | + } |
| 185 | + }, 60000); |
| 186 | + |
| 187 | + if (!avail.ok) { |
| 188 | + // In CI, missing prerequisites are a hard failure — a silent skip would let |
| 189 | + // the Linux tofu regression ship behind a green build. Locally, just warn. |
| 190 | + test("emoji gate prerequisites are present (hard-required in CI)", () => { |
| 191 | + if (process.env.CI) { |
| 192 | + throw new Error(`emoji gate prerequisites missing in CI: ${avail.reason}`); |
| 193 | + } |
| 194 | + console.warn(`[skip] ${avail.reason}`); |
| 195 | + }); |
| 196 | + } |
| 197 | +}); |
0 commit comments