|
1 | | -import fs from "fs"; |
2 | | -import pdfParse from "pdf-parse"; |
| 1 | +import fs from 'fs'; |
| 2 | +import { PDFExtract } from 'pdf.js-extract'; |
| 3 | +import { PDFDocument, rgb } from 'pdf-lib'; |
| 4 | +import { ComparePdf } from "compare-pdf-plus"; |
| 5 | +import Allure from '@allure'; |
3 | 6 |
|
4 | 7 | export default class PDFUtil { |
| 8 | + private static readonly MASKED_PDF_DIR_BASELINE = './test-results/pdf/masked/baseline'; |
| 9 | + private static readonly MASKED_PDF_DIR_ACTUAL = './test-results/pdf/masked/actual'; |
| 10 | + private static readonly PNG_DIR_BASELINE = './test-results/pdf/png/baseline'; |
| 11 | + private static readonly PNG_DIR_ACTUAL = './test-results/pdf/png/actual'; |
| 12 | + private static readonly PNG_DIR_DIFF = './test-results/pdf/png/diff'; |
5 | 13 | /** |
6 | | - * Gets the text content of the pdf file |
7 | | - * @param filePath File path |
8 | | - * @returns PDF as text |
| 14 | + * Gets the text, number of pages and info from a PDF file |
| 15 | + * @param pathToFileDirectory path to file directory |
| 16 | + * @param fileName name of the PDF file |
| 17 | + * @returns |
9 | 18 | */ |
10 | | - public static async getText(filePath: string): Promise<string> { |
11 | | - const buffer = fs.readFileSync(filePath); |
12 | | - try { |
13 | | - const data = await pdfParse(buffer); |
14 | | - return data.text; |
15 | | - } catch (err) { |
16 | | - throw new Error(err); |
| 19 | + public static async getPdfTextDetails(pathToFileDirectory: string, fileName: string) { |
| 20 | + const filePath = `${pathToFileDirectory}/${fileName}`; |
| 21 | + const pdfExtract = new PDFExtract(); |
| 22 | + const data = await pdfExtract.extract(filePath, {}); |
| 23 | + const fullText = data.pages.map(page => page.content.map(item => item.str).join(' ')).join('\n'); |
| 24 | + return { |
| 25 | + pageCount: data.pages.length, |
| 26 | + content: fullText, |
| 27 | + info: data.meta.info |
| 28 | + }; |
| 29 | + } |
| 30 | + /** |
| 31 | + * Extracts text positions from a specific page in a PDF file |
| 32 | + * @param pdfPath path to the PDF file |
| 33 | + * @param page page number (0-indexed) |
| 34 | + * @returns |
| 35 | + */ |
| 36 | + private static async extractTextPositions(pdfPath: string, page: number): Promise<any[]> { |
| 37 | + const pdfExtract = new PDFExtract(); |
| 38 | + const data = await pdfExtract.extract(pdfPath, {}); |
| 39 | + return data.pages[page].content.map(item => ({ text: item.str, x: item.x, y: item.y, width: item.width, height: item.height })); |
| 40 | + } |
| 41 | + /** |
| 42 | + * Finds text items that match any of the dynamic texts to be masked |
| 43 | + * @param textItems |
| 44 | + * @param dynamicTexts |
| 45 | + * @returns |
| 46 | + */ |
| 47 | + private static findDynamicMatches(textItems: any[], dynamicTexts: string[]) { |
| 48 | + const matches = []; |
| 49 | + for (const dynamicText of dynamicTexts) { |
| 50 | + for (const item of textItems) { |
| 51 | + if (item.text.includes(dynamicText)) { |
| 52 | + matches.push(item); |
| 53 | + } |
| 54 | + } |
17 | 55 | } |
| 56 | + return matches; |
18 | 57 | } |
19 | | - |
20 | 58 | /** |
21 | | - * Gets number of pages in pdf file |
22 | | - * @param filePath File path |
23 | | - * @returns Number of pages |
| 59 | + * Adjusts coordinates based on page rotation |
| 60 | + * @param x masked rectangle x coordinate |
| 61 | + * @param y masked rectangle y coordinate |
| 62 | + * @param w masked rectangle width |
| 63 | + * @param h masked rectangle height |
| 64 | + * @param pageWidth width of the PDF file |
| 65 | + * @param pageHeight height of the PDF file |
| 66 | + * @param rotation PDF page rotation |
| 67 | + * @returns |
24 | 68 | */ |
25 | | - public static async getNumberOfPages(filePath: string): Promise<number> { |
26 | | - const buffer = fs.readFileSync(filePath); |
27 | | - try { |
28 | | - const data = await pdfParse(buffer); |
29 | | - return data.numpages; |
30 | | - } catch (err) { |
31 | | - throw new Error(err); |
| 69 | + private static transformCoordinates(x: number, y: number, w: number, h: number, pageWidth: number, pageHeight: number, rotation: number) { |
| 70 | + switch (rotation) { |
| 71 | + case 0: |
| 72 | + return { x, y: pageHeight - y, width: w, height: h }; |
| 73 | + case 90: |
| 74 | + return { x: y, y: x - h, width: h, height: w }; |
| 75 | + case 180: |
| 76 | + return { x: pageWidth - x - w, y: y - h, width: w, height: h }; |
| 77 | + case 270: |
| 78 | + return { x: pageHeight - y - h, y: pageWidth - x - w, width: h, height: w }; |
| 79 | + default: |
| 80 | + return { x, y, width: w, height: h }; |
32 | 81 | } |
33 | 82 | } |
34 | | - |
35 | 83 | /** |
36 | | - * Gets the information about the pdf file |
37 | | - * @param filePath File path |
38 | | - * @returns PDF document info |
| 84 | + * Masks the dynamic text in the specified pages of the baseline and actual PDF files, saves the masked files, and compares them |
| 85 | + * @param baselineDirPath path to the baseline PDF file directory |
| 86 | + * @param baselineFileName baseline PDF file name |
| 87 | + * @param actualDirPath path to the actual PDF file directory |
| 88 | + * @param actualFileName actual PDF file name |
| 89 | + * @param pageNumbers array of pages to be compared (0-indexed) |
| 90 | + * @param dynamicTexts array of text from baseline PDF file to be masked |
39 | 91 | */ |
40 | | - public static async getInfo(filePath: string): Promise<any> { |
41 | | - const buffer = fs.readFileSync(filePath); |
42 | | - try { |
43 | | - const data = await pdfParse(buffer); |
44 | | - return data.info; |
45 | | - } catch (err) { |
46 | | - throw new Error(err); |
| 92 | + private static async maskPdf(baselineDirPath: string, baselineFileName: string, actualDirPath: string, actualFileName: string, pageNumbers: number[], dynamicTexts: string[]) { |
| 93 | + const baselinePath = `${baselineDirPath}/${baselineFileName}`; |
| 94 | + const actualPath = `${actualDirPath}/${actualFileName}`; |
| 95 | + const baselinePdfDoc = await PDFDocument.load(fs.readFileSync(baselinePath)); |
| 96 | + const actualPdfDoc = await PDFDocument.load(fs.readFileSync(actualPath)); |
| 97 | + for (const pageNum of pageNumbers) { |
| 98 | + const textItems = await this.extractTextPositions(baselinePath, pageNum); |
| 99 | + const matchedItems = this.findDynamicMatches(textItems, dynamicTexts); |
| 100 | + const baselinePage = baselinePdfDoc.getPage(pageNum); |
| 101 | + const actualPage = actualPdfDoc.getPage(pageNum); |
| 102 | + const { width, height } = baselinePage.getSize(); |
| 103 | + const rotation = baselinePage.getRotation().angle; |
| 104 | + for (const item of matchedItems) { |
| 105 | + const rect = this.transformCoordinates(item.x, item.y, item.width, item.height, width, height, rotation); |
| 106 | + const pdfRectOptions = { |
| 107 | + x: rect.x, |
| 108 | + y: rect.y, |
| 109 | + width: rect.width, |
| 110 | + height: rect.height, |
| 111 | + color: rgb(0, 0, 0), |
| 112 | + borderColor: rgb(0, 0, 0), |
| 113 | + borderWidth: 8, |
| 114 | + opacity: 1, |
| 115 | + } |
| 116 | + baselinePage.drawRectangle(pdfRectOptions); |
| 117 | + actualPage.drawRectangle(pdfRectOptions); |
| 118 | + } |
47 | 119 | } |
| 120 | + const outputDirPathBaseline = this.MASKED_PDF_DIR_BASELINE; |
| 121 | + if (!fs.existsSync(outputDirPathBaseline)) { |
| 122 | + fs.mkdirSync(outputDirPathBaseline, { recursive: true }); |
| 123 | + } |
| 124 | + const outputDirPathActual = this.MASKED_PDF_DIR_ACTUAL; |
| 125 | + if (!fs.existsSync(outputDirPathActual)) { |
| 126 | + fs.mkdirSync(outputDirPathActual, { recursive: true }); |
| 127 | + } |
| 128 | + const timeStamp = new Date().valueOf(); |
| 129 | + const outputBytesBaseline = await baselinePdfDoc.save(); |
| 130 | + const maskedBaselineFileName = baselineFileName.replace('.pdf', `_${timeStamp}.pdf`); |
| 131 | + const maskedActualFileName = actualFileName.replace('.pdf', `_${timeStamp}.pdf`); |
| 132 | + fs.writeFileSync(`${outputDirPathBaseline}/${maskedBaselineFileName}`, outputBytesBaseline); |
| 133 | + const outputBytesActual = await actualPdfDoc.save(); |
| 134 | + fs.writeFileSync(`${outputDirPathActual}/${maskedActualFileName}`, outputBytesActual); |
| 135 | + return { maskedBaselineDir: outputDirPathBaseline, maskedBaselineFileName, maskedActualDir: outputDirPathActual, maskedActualFileName }; |
| 136 | + } |
| 137 | + /** |
| 138 | + * Compares the specified pages of the actual and baseline PDF files and returns the comparison results |
| 139 | + * @param actualPdfDir Path to the actual PDF file directory |
| 140 | + * @param actualPdfFileName Actual PDF file name |
| 141 | + * @param baselinePdfDir Path to the baseline PDF file directory |
| 142 | + * @param baselinePdfFileName Baseline PDF file name |
| 143 | + * @param pages Array of pages to be compared (0-indexed) |
| 144 | + * @param tolerance Pixel difference tolerance |
| 145 | + * @param threshold Similarity threshold (range is 0.00 to 1.00) |
| 146 | + * @returns |
| 147 | + */ |
| 148 | + private static async comparePdf(actualPdfDir: string, actualPdfFileName: string, baselinePdfDir: string, baselinePdfFileName: string, pages: number[], tolerance: number, threshold: number) { |
| 149 | + const comparer = new ComparePdf({ |
| 150 | + paths: { |
| 151 | + actualPdfRootFolder: actualPdfDir, |
| 152 | + actualPngRootFolder: this.PNG_DIR_ACTUAL, |
| 153 | + baselinePdfRootFolder: baselinePdfDir, |
| 154 | + baselinePngRootFolder: this.PNG_DIR_BASELINE, |
| 155 | + diffPngRootFolder: this.PNG_DIR_DIFF, |
| 156 | + }, |
| 157 | + settings: { |
| 158 | + imageEngine: 'native', |
| 159 | + density: 150, |
| 160 | + quality: 80, |
| 161 | + tolerance: tolerance, |
| 162 | + threshold: threshold, |
| 163 | + cleanPngPaths: false, |
| 164 | + matchPageCount: true, |
| 165 | + disableFontFace: true, |
| 166 | + }, |
| 167 | + }); |
| 168 | + const results = await comparer |
| 169 | + .actualPdfFile(actualPdfFileName) |
| 170 | + .baselinePdfFile(baselinePdfFileName) |
| 171 | + .onlyPageIndexes(pages) |
| 172 | + .compare(); |
| 173 | + return results; |
| 174 | + } |
| 175 | + /** |
| 176 | + * Masks the dynamic text in the specified pages of the baseline and actual PDF files and compares them. If differences are found, attaches the actual, baseline, and diff files to the test report. |
| 177 | + * @param baselineDirPath Path to the baseline PDF file directory |
| 178 | + * @param baselineFileName Baseline PDF file name |
| 179 | + * @param actualDirPath Path to the actual PDF file directory |
| 180 | + * @param actualFileName Actual PDF file name |
| 181 | + * @param pageNumber Pages to be compared (1-indexed, comma-separated. E.g., "1,2,3") |
| 182 | + * @param maskTexts text from baseline PDF file to be masked (pipe-separated. E.g., "text1|text2") |
| 183 | + * @param tolerance Pixel difference tolerance (default is 0). Use 0 for strict comparison and higher values for more lenient comparison |
| 184 | + * @param threshold Similarity threshold (default is 0.00, range is 0.00 to 1.00). Use 0.00 for strict comparison and higher values for more lenient comparison |
| 185 | + */ |
| 186 | + public static async maskAndComparePdf(baselineDirPath: string, baselineFileName: string, actualDirPath: string, actualFileName: string, pageNumber: string, maskTexts: string, tolerance: number = 0, threshold: number = 0.00) { |
| 187 | + const pageNumbers = pageNumber.split(',').map(num => parseInt(num.trim(), 10) - 1); |
| 188 | + const dynamicText = maskTexts ? maskTexts.split('|').map(text => text.trim()) : [] |
| 189 | + if (dynamicText.length > 0) { |
| 190 | + const maskDetails = await this.maskPdf(baselineDirPath, baselineFileName, actualDirPath, actualFileName, pageNumbers, dynamicText); |
| 191 | + baselineDirPath = maskDetails.maskedBaselineDir; |
| 192 | + baselineFileName = maskDetails.maskedBaselineFileName; |
| 193 | + actualDirPath = maskDetails.maskedActualDir; |
| 194 | + actualFileName = maskDetails.maskedActualFileName; |
| 195 | + } |
| 196 | + const result = await this.comparePdf(actualDirPath, actualFileName, baselineDirPath, baselineFileName, pageNumbers, tolerance, threshold); |
| 197 | + if (result.status !== 'passed') { |
| 198 | + await Allure.attachPDF('Actual PDF', `${actualDirPath}/${actualFileName}`); |
| 199 | + await Allure.attachPDF('Baseline PDF', `${baselineDirPath}/${baselineFileName}`); |
| 200 | + for (let i = 0; i < result.details.length; i++) { |
| 201 | + await Allure.attachPNG(`PDF Diff${i + 1}`, result.details[i].diffPng); |
| 202 | + } |
| 203 | + console.log(`PDF comparison failed.\n ${JSON.stringify(result.details, null, 2)}`); |
| 204 | + } |
| 205 | + return result.status === 'passed'; |
48 | 206 | } |
49 | 207 | } |
0 commit comments