11package org .opendataloader .pdf .processors ;
22
33import org .opendataloader .pdf .api .Config ;
4+ import org .opendataloader .pdf .entities .EnrichedImageChunk ;
45import org .verapdf .gf .model .impl .sa .GFSANode ;
56import org .verapdf .wcag .algorithms .entities .*;
67import org .verapdf .wcag .algorithms .entities .content .ImageChunk ;
@@ -35,7 +36,7 @@ public static List<List<IObject>> processDocument(String inputPdfName, Config co
3536 contents .add (new ArrayList <>());
3637 }
3738 ITree tree = StaticContainers .getDocument ().getTree ();
38- processStructElem (tree .getRoot ());
39+ processStructElem (tree .getRoot (), null );
3940 List <List <IObject >> artifacts = collectArtifacts (totalPages );
4041 for (int pageNumber = 0 ; pageNumber < totalPages ; pageNumber ++) {
4142 if (!shouldProcessPage (pageNumber )) {
@@ -92,17 +93,17 @@ private static boolean shouldProcessPage(int pageNumber) {
9293 return pagesToProcess == null || pagesToProcess .contains (pageNumber );
9394 }
9495
95- private static void processStructElem (INode node ) {
96+ private static void processStructElem (INode node , INode parent ) {
9697 if (node instanceof SemanticFigure ) {
97- processImage ((SemanticFigure ) node );
98+ processImage ((SemanticFigure ) node , parent );
9899 return ;
99100 }
100101 if (node instanceof SemanticSpan ) {
101102 processTextChunk ((SemanticSpan ) node );
102103 }
103104 if (node .getInitialSemanticType () == null ) {
104105 for (INode child : node .getChildren ()) {
105- processStructElem (child );
106+ processStructElem (child , node );
106107 }
107108 return ;
108109 }
@@ -133,7 +134,7 @@ private static void processStructElem(INode node) {
133134 break ;
134135 default :
135136 for (INode child : node .getChildren ()) {
136- processStructElem (child );
137+ processStructElem (child , node );
137138 }
138139 }
139140 }
@@ -194,7 +195,7 @@ private static void processList(INode node) {
194195 list .add (listItem );
195196 }
196197 } else {
197- processStructElem (child );
198+ processStructElem (child , node );
198199 }
199200 }
200201 addObjectToContent (list );
@@ -280,11 +281,11 @@ private static List<INode> processTableRows(INode table) {
280281 listTR .add (child );
281282 processTableRowsChildren (child );
282283 } else {
283- processStructElem (child );
284+ processStructElem (child , elem );
284285 }
285286 }
286287 } else {
287- processStructElem (elem );
288+ processStructElem (elem , table );
288289 }
289290 }
290291 return listTR ;
@@ -294,7 +295,7 @@ private static void processTableRowsChildren(INode tableRow) {
294295 for (INode tableCell : tableRow .getChildren ()) {
295296 SemanticType tableCellType = tableCell .getInitialSemanticType ();
296297 if (SemanticType .TABLE_CELL != tableCellType && SemanticType .TABLE_HEADER != tableCellType ) {
297- processStructElem (tableCell );
298+ processStructElem (tableCell , tableRow );
298299 }
299300 }
300301 }
@@ -338,7 +339,7 @@ private static void processTableCell(TableBorderCell cell, INode elem) {
338339 private static void processChildContents (INode elem , List <IObject > contents ) {
339340 contentsStack .add (contents );
340341 for (INode childChild : elem .getChildren ()) {
341- processStructElem (childChild );
342+ processStructElem (childChild , elem );
342343 }
343344 contentsStack .pop ();
344345 }
@@ -395,10 +396,13 @@ private static void processTOC(INode toc) {
395396
396397 }
397398
398- private static void processImage (SemanticFigure image ) {
399+ private static void processImage (SemanticFigure image , INode parent ) {
400+ GFSANode parentNode = (GFSANode ) parent ;
399401 List <ImageChunk > images = image .getImages ();
400402 if (!images .isEmpty ()) {
401- addObjectToContent (images .get (0 ));
403+ String alt = parentNode .getStructElem ().getStructElemDictionary ().getAlternateDescription ();
404+ ImageChunk imageChunk = images .get (0 );
405+ addObjectToContent (alt == null ? imageChunk : new EnrichedImageChunk (imageChunk , alt ));
402406 }
403407 }
404408
@@ -412,7 +416,7 @@ private static List<IObject> getContents(INode node) {
412416 if (child instanceof SemanticSpan ) {
413417 result .add (((SemanticSpan )child ).getColumns ().get (0 ).getFirstLine ().getFirstTextChunk ());
414418 } else if (child instanceof SemanticFigure ) {
415- processImage ((SemanticFigure )child );
419+ processImage ((SemanticFigure )child , node );
416420 } else {
417421 result .addAll (getContents (child ));
418422 }
0 commit comments