|
| 1 | +package com.applitools.imagetester.lib.converters; |
| 2 | + |
| 3 | +import org.apache.pdfbox.pdmodel.PDDocument; |
| 4 | +import org.apache.pdfbox.pdmodel.PDPage; |
| 5 | +import org.apache.pdfbox.pdmodel.PDPageContentStream; |
| 6 | +import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode; |
| 7 | +import org.apache.pdfbox.pdmodel.common.PDRectangle; |
| 8 | +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; |
| 9 | +import org.w3c.dom.Document; |
| 10 | +import org.w3c.dom.Element; |
| 11 | +import org.w3c.dom.Node; |
| 12 | +import org.w3c.dom.NodeList; |
| 13 | + |
| 14 | +import javax.xml.parsers.DocumentBuilder; |
| 15 | +import javax.xml.parsers.DocumentBuilderFactory; |
| 16 | +import java.io.ByteArrayInputStream; |
| 17 | +import java.io.File; |
| 18 | +import java.io.IOException; |
| 19 | +import java.io.InputStream; |
| 20 | +import java.nio.file.Path; |
| 21 | +import java.util.ArrayList; |
| 22 | +import java.util.Enumeration; |
| 23 | +import java.util.List; |
| 24 | +import java.util.Locale; |
| 25 | +import java.util.Optional; |
| 26 | +import java.util.regex.Matcher; |
| 27 | +import java.util.regex.Pattern; |
| 28 | +import java.util.zip.ZipEntry; |
| 29 | +import java.util.zip.ZipFile; |
| 30 | + |
| 31 | +/** |
| 32 | + * Re-stamps Excel header/footer pictures onto a LibreOffice-produced PDF. |
| 33 | + * |
| 34 | + * LibreOffice's headless xlsx->pdf conversion silently drops VML header/footer |
| 35 | + * graphics (the legacy mechanism Excel uses for "watermarks"). We pull the |
| 36 | + * picture directly out of the .xlsx package and draw it on every page so the |
| 37 | + * downstream visual comparison sees what Excel would actually print. |
| 38 | + */ |
| 39 | +public class XlsxWatermarkStamper { |
| 40 | + |
| 41 | + private static final String NS_RELATIONSHIPS = |
| 42 | + "http://schemas.openxmlformats.org/officeDocument/2006/relationships"; |
| 43 | + private static final String NS_PACKAGE_RELS = |
| 44 | + "http://schemas.openxmlformats.org/package/2006/relationships"; |
| 45 | + private static final String NS_SHEETML = |
| 46 | + "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; |
| 47 | + private static final String NS_VML = "urn:schemas-microsoft-com:vml"; |
| 48 | + private static final String NS_VML_OFFICE = "urn:schemas-microsoft-com:office:office"; |
| 49 | + |
| 50 | + private static final String GRAPHIC_TOKEN = "&G"; |
| 51 | + private static final String SHEETS_PREFIX = "xl/worksheets/"; |
| 52 | + private static final String SHEET_SUFFIX = ".xml"; |
| 53 | + private static final float SCALE_TO_FIT_RATIO = 0.75f; |
| 54 | + |
| 55 | + private static final Pattern VML_STYLE_DIMENSION = |
| 56 | + Pattern.compile("(width|height)\\s*:\\s*([0-9.]+)\\s*pt", Pattern.CASE_INSENSITIVE); |
| 57 | + |
| 58 | + public File stampIfPresent(File xlsx, File pdfIn, Path tempDir) throws IOException { |
| 59 | + Optional<Watermark> watermark = extractWatermark(xlsx); |
| 60 | + if (!watermark.isPresent()) return pdfIn; |
| 61 | + |
| 62 | + File pdfOut = tempDir.resolve(stampedFilename(pdfIn)).toFile(); |
| 63 | + stamp(pdfIn, watermark.get(), pdfOut); |
| 64 | + return pdfOut; |
| 65 | + } |
| 66 | + |
| 67 | + Optional<Watermark> extractWatermark(File xlsx) throws IOException { |
| 68 | + try (ZipFile zip = new ZipFile(xlsx)) { |
| 69 | + for (String sheetPath : listSheets(zip)) { |
| 70 | + Optional<Watermark> found = resolveForSheet(zip, sheetPath); |
| 71 | + if (found.isPresent()) return found; |
| 72 | + } |
| 73 | + } |
| 74 | + return Optional.empty(); |
| 75 | + } |
| 76 | + |
| 77 | + private Optional<Watermark> resolveForSheet(ZipFile zip, String sheetPath) throws IOException { |
| 78 | + Document sheet = parseEntry(zip, sheetPath); |
| 79 | + if (sheet == null) return Optional.empty(); |
| 80 | + |
| 81 | + Element headerFooter = firstChild(sheet.getDocumentElement(), NS_SHEETML, "headerFooter"); |
| 82 | + Element legacyHf = firstChild(sheet.getDocumentElement(), NS_SHEETML, "legacyDrawingHF"); |
| 83 | + if (headerFooter == null || legacyHf == null || !hasGraphicToken(headerFooter)) { |
| 84 | + return Optional.empty(); |
| 85 | + } |
| 86 | + |
| 87 | + String legacyRid = legacyHf.getAttributeNS(NS_RELATIONSHIPS, "id"); |
| 88 | + if (legacyRid.isEmpty()) return Optional.empty(); |
| 89 | + |
| 90 | + String sheetDir = posixDir(sheetPath); |
| 91 | + String sheetRels = sheetDir + "/_rels/" + filename(sheetPath) + ".rels"; |
| 92 | + String vmlPath = resolveRelationship(zip, sheetRels, legacyRid, sheetDir); |
| 93 | + if (vmlPath == null) return Optional.empty(); |
| 94 | + |
| 95 | + VmlReference ref = parseVml(zip, vmlPath); |
| 96 | + if (ref == null) return Optional.empty(); |
| 97 | + |
| 98 | + String vmlDir = posixDir(vmlPath); |
| 99 | + String vmlRels = vmlDir + "/_rels/" + filename(vmlPath) + ".rels"; |
| 100 | + String imagePath = resolveRelationship(zip, vmlRels, ref.relid, vmlDir); |
| 101 | + if (imagePath == null) return Optional.empty(); |
| 102 | + |
| 103 | + byte[] imageBytes = readEntry(zip, imagePath); |
| 104 | + if (imageBytes == null) return Optional.empty(); |
| 105 | + |
| 106 | + return Optional.of(new Watermark(imageBytes, ref.widthPt, ref.heightPt)); |
| 107 | + } |
| 108 | + |
| 109 | + private static List<String> listSheets(ZipFile zip) { |
| 110 | + List<String> out = new ArrayList<String>(); |
| 111 | + Enumeration<? extends ZipEntry> entries = zip.entries(); |
| 112 | + while (entries.hasMoreElements()) { |
| 113 | + String name = entries.nextElement().getName(); |
| 114 | + if (name.startsWith(SHEETS_PREFIX) && name.endsWith(SHEET_SUFFIX) |
| 115 | + && !name.contains("/_rels/")) { |
| 116 | + out.add(name); |
| 117 | + } |
| 118 | + } |
| 119 | + return out; |
| 120 | + } |
| 121 | + |
| 122 | + private static boolean hasGraphicToken(Element headerFooter) { |
| 123 | + NodeList children = headerFooter.getChildNodes(); |
| 124 | + for (int i = 0; i < children.getLength(); i++) { |
| 125 | + Node child = children.item(i); |
| 126 | + if (child.getNodeType() == Node.ELEMENT_NODE |
| 127 | + && child.getTextContent() != null |
| 128 | + && child.getTextContent().contains(GRAPHIC_TOKEN)) { |
| 129 | + return true; |
| 130 | + } |
| 131 | + } |
| 132 | + return false; |
| 133 | + } |
| 134 | + |
| 135 | + private static String resolveRelationship(ZipFile zip, String relsPath, String rid, String baseDir) |
| 136 | + throws IOException { |
| 137 | + Document rels = parseEntry(zip, relsPath); |
| 138 | + if (rels == null) return null; |
| 139 | + |
| 140 | + NodeList rs = rels.getDocumentElement().getElementsByTagNameNS(NS_PACKAGE_RELS, "Relationship"); |
| 141 | + for (int i = 0; i < rs.getLength(); i++) { |
| 142 | + Element rel = (Element) rs.item(i); |
| 143 | + if (rid.equals(rel.getAttribute("Id"))) { |
| 144 | + return normalizePath(baseDir, rel.getAttribute("Target")); |
| 145 | + } |
| 146 | + } |
| 147 | + return null; |
| 148 | + } |
| 149 | + |
| 150 | + private static VmlReference parseVml(ZipFile zip, String vmlPath) throws IOException { |
| 151 | + Document vml = parseEntry(zip, vmlPath); |
| 152 | + if (vml == null) return null; |
| 153 | + |
| 154 | + NodeList shapes = vml.getElementsByTagNameNS(NS_VML, "shape"); |
| 155 | + for (int i = 0; i < shapes.getLength(); i++) { |
| 156 | + Element shape = (Element) shapes.item(i); |
| 157 | + NodeList imageData = shape.getElementsByTagNameNS(NS_VML, "imagedata"); |
| 158 | + if (imageData.getLength() == 0) continue; |
| 159 | + String relid = ((Element) imageData.item(0)).getAttributeNS(NS_VML_OFFICE, "relid"); |
| 160 | + if (relid.isEmpty()) continue; |
| 161 | + |
| 162 | + float[] dims = parseStyleDimensions(shape.getAttribute("style")); |
| 163 | + return new VmlReference(relid, dims[0], dims[1]); |
| 164 | + } |
| 165 | + return null; |
| 166 | + } |
| 167 | + |
| 168 | + private static float[] parseStyleDimensions(String style) { |
| 169 | + float width = 0f, height = 0f; |
| 170 | + Matcher m = VML_STYLE_DIMENSION.matcher(style == null ? "" : style); |
| 171 | + while (m.find()) { |
| 172 | + float value = Float.parseFloat(m.group(2)); |
| 173 | + if ("width".equalsIgnoreCase(m.group(1))) width = value; |
| 174 | + else height = value; |
| 175 | + } |
| 176 | + return new float[]{width, height}; |
| 177 | + } |
| 178 | + |
| 179 | + private void stamp(File pdfIn, Watermark watermark, File pdfOut) throws IOException { |
| 180 | + try (PDDocument doc = PDDocument.load(pdfIn)) { |
| 181 | + PDImageXObject image = PDImageXObject.createFromByteArray( |
| 182 | + doc, watermark.imageBytes, "watermark"); |
| 183 | + for (PDPage page : doc.getPages()) { |
| 184 | + drawCentered(doc, page, image, watermark); |
| 185 | + } |
| 186 | + doc.save(pdfOut); |
| 187 | + } |
| 188 | + } |
| 189 | + |
| 190 | + private static void drawCentered(PDDocument doc, PDPage page, PDImageXObject image, |
| 191 | + Watermark watermark) throws IOException { |
| 192 | + PDRectangle box = page.getMediaBox(); |
| 193 | + float pageW = box.getWidth(); |
| 194 | + float pageH = box.getHeight(); |
| 195 | + float[] drawSize = drawSize(image, watermark, pageW, pageH); |
| 196 | + float x = (pageW - drawSize[0]) / 2f + box.getLowerLeftX(); |
| 197 | + float y = (pageH - drawSize[1]) / 2f + box.getLowerLeftY(); |
| 198 | + |
| 199 | + try (PDPageContentStream cs = new PDPageContentStream( |
| 200 | + doc, page, AppendMode.APPEND, true, true)) { |
| 201 | + cs.drawImage(image, x, y, drawSize[0], drawSize[1]); |
| 202 | + } |
| 203 | + } |
| 204 | + |
| 205 | + private static float[] drawSize(PDImageXObject image, Watermark watermark, |
| 206 | + float pageW, float pageH) { |
| 207 | + if (watermark.widthPt > 0f && watermark.heightPt > 0f) { |
| 208 | + return new float[]{watermark.widthPt, watermark.heightPt}; |
| 209 | + } |
| 210 | + float scale = Math.min( |
| 211 | + (pageW * SCALE_TO_FIT_RATIO) / image.getWidth(), |
| 212 | + (pageH * SCALE_TO_FIT_RATIO) / image.getHeight()); |
| 213 | + return new float[]{image.getWidth() * scale, image.getHeight() * scale}; |
| 214 | + } |
| 215 | + |
| 216 | + private static Document parseEntry(ZipFile zip, String path) throws IOException { |
| 217 | + byte[] bytes = readEntry(zip, path); |
| 218 | + if (bytes == null) return null; |
| 219 | + try { |
| 220 | + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); |
| 221 | + factory.setNamespaceAware(true); |
| 222 | + DocumentBuilder builder = factory.newDocumentBuilder(); |
| 223 | + try (InputStream in = new ByteArrayInputStream(bytes)) { |
| 224 | + return builder.parse(in); |
| 225 | + } |
| 226 | + } catch (Exception e) { |
| 227 | + throw new IOException("Failed to parse " + path + ": " + e.getMessage(), e); |
| 228 | + } |
| 229 | + } |
| 230 | + |
| 231 | + private static byte[] readEntry(ZipFile zip, String path) throws IOException { |
| 232 | + ZipEntry entry = zip.getEntry(path); |
| 233 | + if (entry == null) return null; |
| 234 | + try (InputStream in = zip.getInputStream(entry)) { |
| 235 | + return readAllBytes(in); |
| 236 | + } |
| 237 | + } |
| 238 | + |
| 239 | + private static byte[] readAllBytes(InputStream in) throws IOException { |
| 240 | + java.io.ByteArrayOutputStream buf = new java.io.ByteArrayOutputStream(); |
| 241 | + byte[] chunk = new byte[8192]; |
| 242 | + int n; |
| 243 | + while ((n = in.read(chunk)) > 0) { |
| 244 | + buf.write(chunk, 0, n); |
| 245 | + } |
| 246 | + return buf.toByteArray(); |
| 247 | + } |
| 248 | + |
| 249 | + private static Element firstChild(Element parent, String ns, String localName) { |
| 250 | + NodeList children = parent.getChildNodes(); |
| 251 | + for (int i = 0; i < children.getLength(); i++) { |
| 252 | + Node n = children.item(i); |
| 253 | + if (n.getNodeType() == Node.ELEMENT_NODE |
| 254 | + && ns.equals(n.getNamespaceURI()) |
| 255 | + && localName.equals(n.getLocalName())) { |
| 256 | + return (Element) n; |
| 257 | + } |
| 258 | + } |
| 259 | + return null; |
| 260 | + } |
| 261 | + |
| 262 | + private static String normalizePath(String baseDir, String target) { |
| 263 | + List<String> parts = new ArrayList<String>(); |
| 264 | + for (String segment : baseDir.split("/")) parts.add(segment); |
| 265 | + for (String segment : (target == null ? "" : target).split("/")) parts.add(segment); |
| 266 | + |
| 267 | + List<String> resolved = new ArrayList<String>(); |
| 268 | + for (String segment : parts) { |
| 269 | + if (segment.isEmpty() || ".".equals(segment)) continue; |
| 270 | + if ("..".equals(segment)) { |
| 271 | + if (!resolved.isEmpty()) resolved.remove(resolved.size() - 1); |
| 272 | + continue; |
| 273 | + } |
| 274 | + resolved.add(segment); |
| 275 | + } |
| 276 | + StringBuilder out = new StringBuilder(); |
| 277 | + for (int i = 0; i < resolved.size(); i++) { |
| 278 | + if (i > 0) out.append('/'); |
| 279 | + out.append(resolved.get(i)); |
| 280 | + } |
| 281 | + return out.toString(); |
| 282 | + } |
| 283 | + |
| 284 | + private static String posixDir(String path) { |
| 285 | + int slash = path.lastIndexOf('/'); |
| 286 | + return slash < 0 ? "" : path.substring(0, slash); |
| 287 | + } |
| 288 | + |
| 289 | + private static String filename(String path) { |
| 290 | + int slash = path.lastIndexOf('/'); |
| 291 | + return slash < 0 ? path : path.substring(slash + 1); |
| 292 | + } |
| 293 | + |
| 294 | + private static String stampedFilename(File pdfIn) { |
| 295 | + String name = pdfIn.getName(); |
| 296 | + int dot = name.toLowerCase(Locale.ROOT).lastIndexOf(".pdf"); |
| 297 | + String stem = dot > 0 ? name.substring(0, dot) : name; |
| 298 | + return stem + "-wm.pdf"; |
| 299 | + } |
| 300 | + |
| 301 | + static final class Watermark { |
| 302 | + final byte[] imageBytes; |
| 303 | + final float widthPt; |
| 304 | + final float heightPt; |
| 305 | + |
| 306 | + Watermark(byte[] imageBytes, float widthPt, float heightPt) { |
| 307 | + this.imageBytes = imageBytes; |
| 308 | + this.widthPt = widthPt; |
| 309 | + this.heightPt = heightPt; |
| 310 | + } |
| 311 | + } |
| 312 | + |
| 313 | + private static final class VmlReference { |
| 314 | + final String relid; |
| 315 | + final float widthPt; |
| 316 | + final float heightPt; |
| 317 | + |
| 318 | + VmlReference(String relid, float widthPt, float heightPt) { |
| 319 | + this.relid = relid; |
| 320 | + this.widthPt = widthPt; |
| 321 | + this.heightPt = heightPt; |
| 322 | + } |
| 323 | + } |
| 324 | +} |
0 commit comments