2121import org .opendataloader .pdf .entities .SemanticPicture ;
2222import org .opendataloader .pdf .markdown .MarkdownSyntax ;
2323import org .opendataloader .pdf .utils .Base64ImageUtils ;
24+ import org .opendataloader .pdf .utils .GeneratorUtils ;
2425import org .opendataloader .pdf .utils .ImagesUtils ;
2526import org .verapdf .wcag .algorithms .entities .IObject ;
2627import org .verapdf .wcag .algorithms .entities .SemanticHeaderOrFooter ;
3435import org .verapdf .wcag .algorithms .entities .tables .tableBorders .TableBorderCell ;
3536import org .verapdf .wcag .algorithms .entities .tables .tableBorders .TableBorderRow ;
3637import org .verapdf .wcag .algorithms .semanticalgorithms .containers .StaticContainers ;
37- import org .verapdf .wcag .algorithms .semanticalgorithms .utils .TextChunkUtils ;
3838
3939import java .io .Closeable ;
4040import java .io .File ;
@@ -75,6 +75,8 @@ public class HtmlGenerator implements Closeable {
7575 protected String imageFormat = Config .IMAGE_FORMAT_PNG ;
7676 /** Whether to include page headers and footers in output. */
7777 protected boolean includeHeaderFooter = false ;
78+ protected static final String strikethroughTextOpeningTag = "<del>" ;
79+ protected static final String strikethroughTextClosingTag = "</del>" ;
7880
7981 /**
8082 * Creates a new HtmlGenerator for the specified PDF file.
@@ -287,9 +289,8 @@ protected void writeList(PDFList list) throws IOException {
287289 htmlWriter .write (HtmlSyntax .HTML_LIST_ITEM_TAG );
288290
289291 htmlWriter .write (HtmlSyntax .HTML_PARAGRAPH_TAG );
290- StringBuilder stringBuilder = new StringBuilder ();
291- getTextFromLines (item .getLines (), stringBuilder );
292- htmlWriter .write (getCorrectString (stringBuilder .toString ()));
292+ String value = GeneratorUtils .getTextFromLines (item .getLines (), strikethroughTextOpeningTag , strikethroughTextClosingTag );
293+ htmlWriter .write (getCorrectString (value ));
293294 htmlWriter .write (HtmlSyntax .HTML_PARAGRAPH_CLOSE_TAG );
294295
295296 for (IObject object : item .getContents ()) {
@@ -310,40 +311,11 @@ protected void writeList(PDFList list) throws IOException {
310311 */
311312 protected void writeSemanticTextNode (SemanticTextNode textNode ) throws IOException {
312313 htmlWriter .write (HtmlSyntax .HTML_FIGURE_CAPTION_TAG );
313- htmlWriter .write (getCorrectString (getTextFromColumns (textNode )));
314+ htmlWriter .write (getCorrectString (GeneratorUtils . getTextFromTextNode (textNode , strikethroughTextOpeningTag , strikethroughTextClosingTag )));
314315 htmlWriter .write (HtmlSyntax .HTML_FIGURE_CAPTION_CLOSE_TAG );
315316 htmlWriter .write (HtmlSyntax .HTML_LINE_BREAK );
316317 }
317318
318- protected void getTextFromLines (List <TextLine > textLines , StringBuilder stringBuilder ) {
319- for (int i = 0 ; i < textLines .size () - 1 ; i ++) {
320- TextLine line = textLines .get (i );
321- getTextFromLine (line , stringBuilder );
322- TextChunkUtils .formatLineEnd (stringBuilder );
323- }
324- getTextFromLine (textLines .get (textLines .size () - 1 ), stringBuilder );
325- }
326-
327- protected void getTextFromLine (TextLine line , StringBuilder stringBuilder ) {
328- for (TextChunk chunk : line .getTextChunks ()) {
329- if (chunk .getIsStrikethroughText ()) {
330- stringBuilder .append ("<del>" ).append (chunk .getValue ()).append ("</del>" );
331- } else {
332- stringBuilder .append (chunk .getValue ());
333- }
334- }
335- }
336-
337- protected String getTextFromColumns (SemanticTextNode node ) {
338- StringBuilder stringBuilder = new StringBuilder ();
339- for (TextColumn column : node .getColumns ()) {
340- for (TextBlock block : column .getBlocks ()) {
341- getTextFromLines (block .getLines (), stringBuilder );
342- }
343- }
344- return stringBuilder .toString ();
345- }
346-
347319 /**
348320 * Writes a table element to the HTML output.
349321 *
@@ -400,7 +372,7 @@ protected void writeParagraph(SemanticParagraph paragraph) throws IOException {
400372 if (paragraphIndent > 0 ) {
401373 htmlWriter .write (HtmlSyntax .HTML_INDENT );
402374 }
403- String paragraphValue = getTextFromColumns (paragraph );
375+ String paragraphValue = GeneratorUtils . getTextFromTextNode (paragraph , strikethroughTextOpeningTag , strikethroughTextClosingTag );
404376
405377 if (isInsideTable () && StaticContainers .isKeepLineBreaks ()) {
406378 paragraphValue = paragraphValue .replace (HtmlSyntax .HTML_LINE_BREAK , HtmlSyntax .HTML_LINE_BREAK_TAG );
@@ -420,7 +392,7 @@ protected void writeParagraph(SemanticParagraph paragraph) throws IOException {
420392 protected void writeHeading (SemanticHeading heading ) throws IOException {
421393 int headingLevel = Math .min (6 , Math .max (1 , heading .getHeadingLevel ()));
422394 htmlWriter .write ("<h" + headingLevel + ">" );
423- htmlWriter .write (getCorrectString (getTextFromColumns (heading )));
395+ htmlWriter .write (getCorrectString (GeneratorUtils . getTextFromTextNode (heading , strikethroughTextOpeningTag , strikethroughTextClosingTag )));
424396 htmlWriter .write ("</h" + headingLevel + ">" );
425397 htmlWriter .write (HtmlSyntax .HTML_LINE_BREAK );
426398 }
0 commit comments