|
32 | 32 |
|
33 | 33 | public class HeaderFooterProcessorTest { |
34 | 34 |
|
35 | | - @Test |
36 | | - public void testProcessHeadersAndFooters() { |
| 35 | + private void initContainers() { |
37 | 36 | StaticContainers.setIsDataLoader(true); |
38 | 37 | StaticContainers.setIsIgnoreCharactersWithoutUnicode(false); |
39 | 38 | StaticResources.setDocument(null); |
40 | | - StaticLayoutContainers.setCurrentContentId(0); |
| 39 | + StaticLayoutContainers.clearContainers(); |
| 40 | + } |
| 41 | + |
| 42 | + @Test |
| 43 | + public void testProcessHeadersAndFooters() { |
| 44 | + initContainers(); |
41 | 45 | List<List<IObject>> contents = new ArrayList<>(); |
42 | 46 | List<IObject> page1Contents = new ArrayList<>(); |
43 | 47 | page1Contents.add(new TextLine(new TextChunk(new BoundingBox(0, 10.0, 30.0, 20.0, 40.0), |
@@ -69,4 +73,87 @@ public void testProcessHeadersAndFooters() { |
69 | 73 | Assertions.assertTrue(contents.get(1).get(2) instanceof SemanticHeaderOrFooter); |
70 | 74 | Assertions.assertEquals(SemanticType.FOOTER, ((SemanticHeaderOrFooter) contents.get(1).get(2)).getSemanticType()); |
71 | 75 | } |
| 76 | + |
| 77 | + /** |
| 78 | + * Tests that body text repeated on adjacent pages is not absorbed into the footer. |
| 79 | + * Reproduces #385: pages 19-20 of CERAGEM PDF have identical note text |
| 80 | + * "※ 출수 중 출수 버튼을 터치하면 출수가 정지됩니다." at y=116 above the actual |
| 81 | + * footer at y=34. The note was incorrectly classified as footer because it matched |
| 82 | + * across pages. Page height is 595 (A4-like). |
| 83 | + */ |
| 84 | + @Test |
| 85 | + public void testRepeatedBodyTextNotAbsorbedIntoFooter() { |
| 86 | + initContainers(); |
| 87 | + // Simulate 4 pages (17-20) with A4-like height (595pt) |
| 88 | + // Page bounding box: [0, 0, 420, 595] |
| 89 | + // Footer line at y=35 (bottom), body note at y=117 (well above footer) |
| 90 | + double pageHeight = 595.0; |
| 91 | + double footerY = 35.0; |
| 92 | + double bodyNoteY = 117.0; |
| 93 | + |
| 94 | + List<List<IObject>> contents = new ArrayList<>(); |
| 95 | + for (int page = 0; page < 4; page++) { |
| 96 | + List<IObject> pageContents = new ArrayList<>(); |
| 97 | + // Body heading at top |
| 98 | + pageContents.add(new TextLine(new TextChunk( |
| 99 | + new BoundingBox(page, 37.0, pageHeight - 60, 300.0, pageHeight - 30), |
| 100 | + "Section " + (page + 1), 12, pageHeight - 30))); |
| 101 | + // Body paragraph in middle |
| 102 | + pageContents.add(new TextLine(new TextChunk( |
| 103 | + new BoundingBox(page, 37.0, pageHeight / 2, 300.0, pageHeight / 2 + 30), |
| 104 | + "Body content page " + (page + 1), 10, pageHeight / 2 + 30))); |
| 105 | + |
| 106 | + // Repeated body note — same text on pages 2 and 3 (simulating pages 19-20) |
| 107 | + if (page == 2 || page == 3) { |
| 108 | + pageContents.add(new TextLine(new TextChunk( |
| 109 | + new BoundingBox(page, 223.0, bodyNoteY, 360.0, bodyNoteY + 18), |
| 110 | + "※ Repeated note text", 6.5, bodyNoteY + 18))); |
| 111 | + } |
| 112 | + |
| 113 | + // Actual footer line (repeating pattern across all pages) |
| 114 | + String footerText = (page % 2 == 0) |
| 115 | + ? "CGM BALANCE " + (page + 17) |
| 116 | + : (page + 17) + " CERAGEM BALANCE USER MANUAL"; |
| 117 | + pageContents.add(new TextLine(new TextChunk( |
| 118 | + new BoundingBox(page, 37.0, footerY, 280.0, footerY + 9), |
| 119 | + footerText, 7.5, footerY + 9))); |
| 120 | + |
| 121 | + contents.add(pageContents); |
| 122 | + } |
| 123 | + |
| 124 | + HeaderFooterProcessor.processHeadersAndFooters(contents, false); |
| 125 | + |
| 126 | + // Verify: each page should have footer detected |
| 127 | + for (int page = 0; page < 4; page++) { |
| 128 | + List<IObject> pageContent = contents.get(page); |
| 129 | + IObject lastElement = pageContent.get(pageContent.size() - 1); |
| 130 | + Assertions.assertTrue(lastElement instanceof SemanticHeaderOrFooter, |
| 131 | + "Page " + page + ": last element should be footer"); |
| 132 | + SemanticHeaderOrFooter footer = (SemanticHeaderOrFooter) lastElement; |
| 133 | + Assertions.assertEquals(SemanticType.FOOTER, footer.getSemanticType()); |
| 134 | + |
| 135 | + // Critical: footer should contain only 1 element (the actual footer line), |
| 136 | + // NOT the repeated body note |
| 137 | + Assertions.assertEquals(1, footer.getContents().size(), |
| 138 | + "Page " + page + ": footer should contain only the footer line, " + |
| 139 | + "not absorb the repeated body note. Got " + footer.getContents().size() + " elements."); |
| 140 | + } |
| 141 | + |
| 142 | + // Verify: the repeated note text on pages 2-3 should still be in body content |
| 143 | + for (int page = 2; page <= 3; page++) { |
| 144 | + List<IObject> pageContent = contents.get(page); |
| 145 | + boolean foundNote = false; |
| 146 | + for (IObject obj : pageContent) { |
| 147 | + if (!(obj instanceof SemanticHeaderOrFooter) && obj instanceof TextLine) { |
| 148 | + TextLine line = (TextLine) obj; |
| 149 | + if (line.getValue().contains("Repeated note")) { |
| 150 | + foundNote = true; |
| 151 | + break; |
| 152 | + } |
| 153 | + } |
| 154 | + } |
| 155 | + Assertions.assertTrue(foundNote, |
| 156 | + "Page " + page + ": repeated note text should remain in body, not be absorbed into footer"); |
| 157 | + } |
| 158 | + } |
72 | 159 | } |
0 commit comments