Skip to content

Commit e24b174

Browse files
committed
Fix issue with Excel rendered watermarks not visualized in PDF screenshots
1 parent 17f94ce commit e24b174

5 files changed

Lines changed: 503 additions & 3 deletions

File tree

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
<groupId>com.applitools.imagetester</groupId>
66
<artifactId>ImageTester</artifactId>
7-
<version>3.11.0</version>
7+
<version>3.11.1</version>
88
<packaging>jar</packaging>
99
<properties>
1010
<maven.compiler.source>1.8</maven.compiler.source>

src/main/java/com/applitools/imagetester/ImageTester.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
import com.applitools.imagetester.lib.Utils;
3030

3131
public class ImageTester {
32-
private static final String cur_ver = "3.11.0";
32+
private static final String cur_ver = "3.11.1";
3333
private static final String DEFAULT_THREADS = String.valueOf(Runtime.getRuntime().availableProcessors() * 2);
3434

3535
public static void main(String[] args) {

src/main/java/com/applitools/imagetester/lib/converters/LibreOfficeConverter.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,22 @@ public class LibreOfficeConverter implements FormatConverter {
1616

1717
private final LibreOfficeLocator locator;
1818
private final ProcessRunner runner;
19+
private final XlsxWatermarkStamper watermarkStamper;
1920
private volatile Path userProfileDir;
2021

2122
public LibreOfficeConverter() {
22-
this(new LibreOfficeLocator(), ProcessRunner.forPlatform());
23+
this(new LibreOfficeLocator(), ProcessRunner.forPlatform(), new XlsxWatermarkStamper());
2324
}
2425

2526
LibreOfficeConverter(LibreOfficeLocator locator, ProcessRunner runner) {
27+
this(locator, runner, new XlsxWatermarkStamper());
28+
}
29+
30+
LibreOfficeConverter(LibreOfficeLocator locator, ProcessRunner runner,
31+
XlsxWatermarkStamper watermarkStamper) {
2632
this.locator = locator;
2733
this.runner = runner;
34+
this.watermarkStamper = watermarkStamper;
2835
}
2936

3037
@Override
@@ -68,6 +75,9 @@ public File convertToPdf(File file, Path tempDir) throws SkippedFileException, I
6875
throw new SkippedFileException(file,
6976
"soffice produced no output pdf for " + file.getName());
7077
}
78+
if (Patterns.SPREADSHEET.matcher(file.getName()).matches()) {
79+
return watermarkStamper.stampIfPresent(file, produced, tempDir);
80+
}
7181
return produced;
7282
}
7383

Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
package com.applitools.imagetester.lib.converters;
2+
3+
import org.apache.pdfbox.pdmodel.PDDocument;
4+
import org.apache.pdfbox.pdmodel.PDPage;
5+
import org.apache.pdfbox.pdmodel.PDPageContentStream;
6+
import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;
7+
import org.apache.pdfbox.pdmodel.common.PDRectangle;
8+
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
9+
import org.w3c.dom.Document;
10+
import org.w3c.dom.Element;
11+
import org.w3c.dom.Node;
12+
import org.w3c.dom.NodeList;
13+
14+
import javax.xml.parsers.DocumentBuilder;
15+
import javax.xml.parsers.DocumentBuilderFactory;
16+
import java.io.ByteArrayInputStream;
17+
import java.io.File;
18+
import java.io.IOException;
19+
import java.io.InputStream;
20+
import java.nio.file.Path;
21+
import java.util.ArrayList;
22+
import java.util.Enumeration;
23+
import java.util.List;
24+
import java.util.Locale;
25+
import java.util.Optional;
26+
import java.util.regex.Matcher;
27+
import java.util.regex.Pattern;
28+
import java.util.zip.ZipEntry;
29+
import java.util.zip.ZipFile;
30+
31+
/**
32+
* Re-stamps Excel header/footer pictures onto a LibreOffice-produced PDF.
33+
*
34+
* LibreOffice's headless xlsx->pdf conversion silently drops VML header/footer
35+
* graphics (the legacy mechanism Excel uses for "watermarks"). We pull the
36+
* picture directly out of the .xlsx package and draw it on every page so the
37+
* downstream visual comparison sees what Excel would actually print.
38+
*/
39+
public class XlsxWatermarkStamper {
40+
41+
private static final String NS_RELATIONSHIPS =
42+
"http://schemas.openxmlformats.org/officeDocument/2006/relationships";
43+
private static final String NS_PACKAGE_RELS =
44+
"http://schemas.openxmlformats.org/package/2006/relationships";
45+
private static final String NS_SHEETML =
46+
"http://schemas.openxmlformats.org/spreadsheetml/2006/main";
47+
private static final String NS_VML = "urn:schemas-microsoft-com:vml";
48+
private static final String NS_VML_OFFICE = "urn:schemas-microsoft-com:office:office";
49+
50+
private static final String GRAPHIC_TOKEN = "&G";
51+
private static final String SHEETS_PREFIX = "xl/worksheets/";
52+
private static final String SHEET_SUFFIX = ".xml";
53+
private static final float SCALE_TO_FIT_RATIO = 0.75f;
54+
55+
private static final Pattern VML_STYLE_DIMENSION =
56+
Pattern.compile("(width|height)\\s*:\\s*([0-9.]+)\\s*pt", Pattern.CASE_INSENSITIVE);
57+
58+
public File stampIfPresent(File xlsx, File pdfIn, Path tempDir) throws IOException {
59+
Optional<Watermark> watermark = extractWatermark(xlsx);
60+
if (!watermark.isPresent()) return pdfIn;
61+
62+
File pdfOut = tempDir.resolve(stampedFilename(pdfIn)).toFile();
63+
stamp(pdfIn, watermark.get(), pdfOut);
64+
return pdfOut;
65+
}
66+
67+
Optional<Watermark> extractWatermark(File xlsx) throws IOException {
68+
try (ZipFile zip = new ZipFile(xlsx)) {
69+
for (String sheetPath : listSheets(zip)) {
70+
Optional<Watermark> found = resolveForSheet(zip, sheetPath);
71+
if (found.isPresent()) return found;
72+
}
73+
}
74+
return Optional.empty();
75+
}
76+
77+
private Optional<Watermark> resolveForSheet(ZipFile zip, String sheetPath) throws IOException {
78+
Document sheet = parseEntry(zip, sheetPath);
79+
if (sheet == null) return Optional.empty();
80+
81+
Element headerFooter = firstChild(sheet.getDocumentElement(), NS_SHEETML, "headerFooter");
82+
Element legacyHf = firstChild(sheet.getDocumentElement(), NS_SHEETML, "legacyDrawingHF");
83+
if (headerFooter == null || legacyHf == null || !hasGraphicToken(headerFooter)) {
84+
return Optional.empty();
85+
}
86+
87+
String legacyRid = legacyHf.getAttributeNS(NS_RELATIONSHIPS, "id");
88+
if (legacyRid.isEmpty()) return Optional.empty();
89+
90+
String sheetDir = posixDir(sheetPath);
91+
String sheetRels = sheetDir + "/_rels/" + filename(sheetPath) + ".rels";
92+
String vmlPath = resolveRelationship(zip, sheetRels, legacyRid, sheetDir);
93+
if (vmlPath == null) return Optional.empty();
94+
95+
VmlReference ref = parseVml(zip, vmlPath);
96+
if (ref == null) return Optional.empty();
97+
98+
String vmlDir = posixDir(vmlPath);
99+
String vmlRels = vmlDir + "/_rels/" + filename(vmlPath) + ".rels";
100+
String imagePath = resolveRelationship(zip, vmlRels, ref.relid, vmlDir);
101+
if (imagePath == null) return Optional.empty();
102+
103+
byte[] imageBytes = readEntry(zip, imagePath);
104+
if (imageBytes == null) return Optional.empty();
105+
106+
return Optional.of(new Watermark(imageBytes, ref.widthPt, ref.heightPt));
107+
}
108+
109+
private static List<String> listSheets(ZipFile zip) {
110+
List<String> out = new ArrayList<String>();
111+
Enumeration<? extends ZipEntry> entries = zip.entries();
112+
while (entries.hasMoreElements()) {
113+
String name = entries.nextElement().getName();
114+
if (name.startsWith(SHEETS_PREFIX) && name.endsWith(SHEET_SUFFIX)
115+
&& !name.contains("/_rels/")) {
116+
out.add(name);
117+
}
118+
}
119+
return out;
120+
}
121+
122+
private static boolean hasGraphicToken(Element headerFooter) {
123+
NodeList children = headerFooter.getChildNodes();
124+
for (int i = 0; i < children.getLength(); i++) {
125+
Node child = children.item(i);
126+
if (child.getNodeType() == Node.ELEMENT_NODE
127+
&& child.getTextContent() != null
128+
&& child.getTextContent().contains(GRAPHIC_TOKEN)) {
129+
return true;
130+
}
131+
}
132+
return false;
133+
}
134+
135+
private static String resolveRelationship(ZipFile zip, String relsPath, String rid, String baseDir)
136+
throws IOException {
137+
Document rels = parseEntry(zip, relsPath);
138+
if (rels == null) return null;
139+
140+
NodeList rs = rels.getDocumentElement().getElementsByTagNameNS(NS_PACKAGE_RELS, "Relationship");
141+
for (int i = 0; i < rs.getLength(); i++) {
142+
Element rel = (Element) rs.item(i);
143+
if (rid.equals(rel.getAttribute("Id"))) {
144+
return normalizePath(baseDir, rel.getAttribute("Target"));
145+
}
146+
}
147+
return null;
148+
}
149+
150+
private static VmlReference parseVml(ZipFile zip, String vmlPath) throws IOException {
151+
Document vml = parseEntry(zip, vmlPath);
152+
if (vml == null) return null;
153+
154+
NodeList shapes = vml.getElementsByTagNameNS(NS_VML, "shape");
155+
for (int i = 0; i < shapes.getLength(); i++) {
156+
Element shape = (Element) shapes.item(i);
157+
NodeList imageData = shape.getElementsByTagNameNS(NS_VML, "imagedata");
158+
if (imageData.getLength() == 0) continue;
159+
String relid = ((Element) imageData.item(0)).getAttributeNS(NS_VML_OFFICE, "relid");
160+
if (relid.isEmpty()) continue;
161+
162+
float[] dims = parseStyleDimensions(shape.getAttribute("style"));
163+
return new VmlReference(relid, dims[0], dims[1]);
164+
}
165+
return null;
166+
}
167+
168+
private static float[] parseStyleDimensions(String style) {
169+
float width = 0f, height = 0f;
170+
Matcher m = VML_STYLE_DIMENSION.matcher(style == null ? "" : style);
171+
while (m.find()) {
172+
float value = Float.parseFloat(m.group(2));
173+
if ("width".equalsIgnoreCase(m.group(1))) width = value;
174+
else height = value;
175+
}
176+
return new float[]{width, height};
177+
}
178+
179+
private void stamp(File pdfIn, Watermark watermark, File pdfOut) throws IOException {
180+
try (PDDocument doc = PDDocument.load(pdfIn)) {
181+
PDImageXObject image = PDImageXObject.createFromByteArray(
182+
doc, watermark.imageBytes, "watermark");
183+
for (PDPage page : doc.getPages()) {
184+
drawCentered(doc, page, image, watermark);
185+
}
186+
doc.save(pdfOut);
187+
}
188+
}
189+
190+
private static void drawCentered(PDDocument doc, PDPage page, PDImageXObject image,
191+
Watermark watermark) throws IOException {
192+
PDRectangle box = page.getMediaBox();
193+
float pageW = box.getWidth();
194+
float pageH = box.getHeight();
195+
float[] drawSize = drawSize(image, watermark, pageW, pageH);
196+
float x = (pageW - drawSize[0]) / 2f + box.getLowerLeftX();
197+
float y = (pageH - drawSize[1]) / 2f + box.getLowerLeftY();
198+
199+
try (PDPageContentStream cs = new PDPageContentStream(
200+
doc, page, AppendMode.APPEND, true, true)) {
201+
cs.drawImage(image, x, y, drawSize[0], drawSize[1]);
202+
}
203+
}
204+
205+
private static float[] drawSize(PDImageXObject image, Watermark watermark,
206+
float pageW, float pageH) {
207+
if (watermark.widthPt > 0f && watermark.heightPt > 0f) {
208+
return new float[]{watermark.widthPt, watermark.heightPt};
209+
}
210+
float scale = Math.min(
211+
(pageW * SCALE_TO_FIT_RATIO) / image.getWidth(),
212+
(pageH * SCALE_TO_FIT_RATIO) / image.getHeight());
213+
return new float[]{image.getWidth() * scale, image.getHeight() * scale};
214+
}
215+
216+
private static Document parseEntry(ZipFile zip, String path) throws IOException {
217+
byte[] bytes = readEntry(zip, path);
218+
if (bytes == null) return null;
219+
try {
220+
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
221+
factory.setNamespaceAware(true);
222+
DocumentBuilder builder = factory.newDocumentBuilder();
223+
try (InputStream in = new ByteArrayInputStream(bytes)) {
224+
return builder.parse(in);
225+
}
226+
} catch (Exception e) {
227+
throw new IOException("Failed to parse " + path + ": " + e.getMessage(), e);
228+
}
229+
}
230+
231+
private static byte[] readEntry(ZipFile zip, String path) throws IOException {
232+
ZipEntry entry = zip.getEntry(path);
233+
if (entry == null) return null;
234+
try (InputStream in = zip.getInputStream(entry)) {
235+
return readAllBytes(in);
236+
}
237+
}
238+
239+
private static byte[] readAllBytes(InputStream in) throws IOException {
240+
java.io.ByteArrayOutputStream buf = new java.io.ByteArrayOutputStream();
241+
byte[] chunk = new byte[8192];
242+
int n;
243+
while ((n = in.read(chunk)) > 0) {
244+
buf.write(chunk, 0, n);
245+
}
246+
return buf.toByteArray();
247+
}
248+
249+
private static Element firstChild(Element parent, String ns, String localName) {
250+
NodeList children = parent.getChildNodes();
251+
for (int i = 0; i < children.getLength(); i++) {
252+
Node n = children.item(i);
253+
if (n.getNodeType() == Node.ELEMENT_NODE
254+
&& ns.equals(n.getNamespaceURI())
255+
&& localName.equals(n.getLocalName())) {
256+
return (Element) n;
257+
}
258+
}
259+
return null;
260+
}
261+
262+
private static String normalizePath(String baseDir, String target) {
263+
List<String> parts = new ArrayList<String>();
264+
for (String segment : baseDir.split("/")) parts.add(segment);
265+
for (String segment : (target == null ? "" : target).split("/")) parts.add(segment);
266+
267+
List<String> resolved = new ArrayList<String>();
268+
for (String segment : parts) {
269+
if (segment.isEmpty() || ".".equals(segment)) continue;
270+
if ("..".equals(segment)) {
271+
if (!resolved.isEmpty()) resolved.remove(resolved.size() - 1);
272+
continue;
273+
}
274+
resolved.add(segment);
275+
}
276+
StringBuilder out = new StringBuilder();
277+
for (int i = 0; i < resolved.size(); i++) {
278+
if (i > 0) out.append('/');
279+
out.append(resolved.get(i));
280+
}
281+
return out.toString();
282+
}
283+
284+
private static String posixDir(String path) {
285+
int slash = path.lastIndexOf('/');
286+
return slash < 0 ? "" : path.substring(0, slash);
287+
}
288+
289+
private static String filename(String path) {
290+
int slash = path.lastIndexOf('/');
291+
return slash < 0 ? path : path.substring(slash + 1);
292+
}
293+
294+
private static String stampedFilename(File pdfIn) {
295+
String name = pdfIn.getName();
296+
int dot = name.toLowerCase(Locale.ROOT).lastIndexOf(".pdf");
297+
String stem = dot > 0 ? name.substring(0, dot) : name;
298+
return stem + "-wm.pdf";
299+
}
300+
301+
static final class Watermark {
302+
final byte[] imageBytes;
303+
final float widthPt;
304+
final float heightPt;
305+
306+
Watermark(byte[] imageBytes, float widthPt, float heightPt) {
307+
this.imageBytes = imageBytes;
308+
this.widthPt = widthPt;
309+
this.heightPt = heightPt;
310+
}
311+
}
312+
313+
private static final class VmlReference {
314+
final String relid;
315+
final float widthPt;
316+
final float heightPt;
317+
318+
VmlReference(String relid, float widthPt, float heightPt) {
319+
this.relid = relid;
320+
this.widthPt = widthPt;
321+
this.heightPt = heightPt;
322+
}
323+
}
324+
}

0 commit comments

Comments
 (0)