|
| 1 | +package com.applitools.imagetester.lib; |
| 2 | + |
| 3 | +import java.io.IOException; |
| 4 | +import java.io.OutputStream; |
| 5 | +import java.util.ArrayDeque; |
| 6 | +import java.util.ArrayList; |
| 7 | +import java.util.Arrays; |
| 8 | +import java.util.Deque; |
| 9 | +import java.util.HashSet; |
| 10 | +import java.util.List; |
| 11 | +import java.util.Set; |
| 12 | + |
| 13 | +import org.apache.pdfbox.contentstream.operator.Operator; |
| 14 | +import org.apache.pdfbox.cos.COSName; |
| 15 | +import org.apache.pdfbox.cos.COSStream; |
| 16 | +import org.apache.pdfbox.pdfparser.PDFStreamParser; |
| 17 | +import org.apache.pdfbox.pdfwriter.ContentStreamWriter; |
| 18 | +import org.apache.pdfbox.pdmodel.PDDocument; |
| 19 | +import org.apache.pdfbox.pdmodel.PDPage; |
| 20 | + |
| 21 | +/** |
| 22 | + * Removes filled vector paths drawn in a target non-stroking (fill) color, |
| 23 | + * leaving everything else — body text, images, strokes, clip paths, and fills |
| 24 | + * of any other color — untouched. |
| 25 | + * |
| 26 | + * Watermarks of the kind this targets are stamped as filled outlines in a |
| 27 | + * single muted color distinct from the document's real content, so keying |
| 28 | + * removal on fill color strips the watermark without touching shared branding. |
| 29 | + */ |
| 30 | +public final class ColorPathStripper { |
| 31 | + |
| 32 | + private static final Set<String> PATH_CONSTRUCTION_OPS = new HashSet<>(Arrays.asList( |
| 33 | + "m", "l", "c", "v", "y", "h", "re", "W", "W*")); |
| 34 | + private static final Set<String> FILL_PAINT_OPS = new HashSet<>(Arrays.asList( |
| 35 | + "f", "F", "f*", "b", "b*", "B", "B*")); |
| 36 | + private static final Set<String> NON_FILL_PAINT_OPS = new HashSet<>(Arrays.asList( |
| 37 | + "S", "s", "n")); |
| 38 | + |
| 39 | + private ColorPathStripper() { |
| 40 | + } |
| 41 | + |
| 42 | + public static void removeFromAllPages(PDDocument doc, float[] targetRgb, float tolerance) throws IOException { |
| 43 | + if (targetRgb == null) return; |
| 44 | + for (int i = 0; i < doc.getNumberOfPages(); i++) { |
| 45 | + PDPage page = doc.getPage(i); |
| 46 | + PDFStreamParser parser = new PDFStreamParser(page); |
| 47 | + parser.parse(); |
| 48 | + List<Object> cleaned = strip(parser.getTokens(), targetRgb, tolerance); |
| 49 | + |
| 50 | + COSStream newStream = new COSStream(); |
| 51 | + try (OutputStream out = newStream.createOutputStream()) { |
| 52 | + new ContentStreamWriter(out).writeTokens(cleaned); |
| 53 | + } |
| 54 | + page.getCOSObject().setItem(COSName.CONTENTS, newStream); |
| 55 | + } |
| 56 | + } |
| 57 | + |
| 58 | + public static List<Object> strip(List<Object> tokens, float[] targetRgb, float tolerance) { |
| 59 | + List<Object> result = new ArrayList<>(); |
| 60 | + List<Object> argBuffer = new ArrayList<>(); |
| 61 | + List<Object> currentPath = new ArrayList<>(); |
| 62 | + boolean inPath = false; |
| 63 | + |
| 64 | + Deque<float[]> stateStack = new ArrayDeque<>(); |
| 65 | + float[] fill = {0f, 0f, 0f}; |
| 66 | + |
| 67 | + for (Object t : tokens) { |
| 68 | + if (!(t instanceof Operator)) { |
| 69 | + argBuffer.add(t); |
| 70 | + continue; |
| 71 | + } |
| 72 | + String op = ((Operator) t).getName(); |
| 73 | + |
| 74 | + if (PATH_CONSTRUCTION_OPS.contains(op)) { |
| 75 | + currentPath.addAll(argBuffer); |
| 76 | + currentPath.add(t); |
| 77 | + argBuffer.clear(); |
| 78 | + inPath = true; |
| 79 | + } else if (inPath && (FILL_PAINT_OPS.contains(op) || NON_FILL_PAINT_OPS.contains(op))) { |
| 80 | + currentPath.addAll(argBuffer); |
| 81 | + currentPath.add(t); |
| 82 | + argBuffer.clear(); |
| 83 | + boolean drop = FILL_PAINT_OPS.contains(op) && colorMatches(fill, targetRgb, tolerance); |
| 84 | + if (!drop) result.addAll(currentPath); |
| 85 | + currentPath.clear(); |
| 86 | + inPath = false; |
| 87 | + } else { |
| 88 | + if (inPath) { |
| 89 | + result.addAll(currentPath); |
| 90 | + currentPath.clear(); |
| 91 | + inPath = false; |
| 92 | + } |
| 93 | + fill = applyColorState(op, argBuffer, fill, stateStack); |
| 94 | + result.addAll(argBuffer); |
| 95 | + result.add(t); |
| 96 | + argBuffer.clear(); |
| 97 | + } |
| 98 | + } |
| 99 | + result.addAll(currentPath); |
| 100 | + result.addAll(argBuffer); |
| 101 | + return result; |
| 102 | + } |
| 103 | + |
| 104 | + private static float[] applyColorState(String op, List<Object> args, float[] fill, Deque<float[]> stack) { |
| 105 | + switch (op) { |
| 106 | + case "q": |
| 107 | + stack.push(fill.clone()); |
| 108 | + return fill; |
| 109 | + case "Q": |
| 110 | + return stack.isEmpty() ? fill : stack.pop(); |
| 111 | + default: |
| 112 | + return DeviceColor.fromOperator(op, args, fill); |
| 113 | + } |
| 114 | + } |
| 115 | + |
| 116 | + private static boolean colorMatches(float[] fill, float[] target, float tol) { |
| 117 | + if (target == null) return false; |
| 118 | + for (int i = 0; i < 3; i++) { |
| 119 | + if (Math.abs(fill[i] - target[i]) > tol) return false; |
| 120 | + } |
| 121 | + return true; |
| 122 | + } |
| 123 | +} |
0 commit comments