Skip to content

Commit 7a1be87

Browse files
authored
Merge branch '9481-pdf-codebook' into develop
2 parents a7df7d8 + 685bc8b commit 7a1be87

6 files changed

Lines changed: 4614 additions & 1 deletion

File tree

pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,12 @@
456456
<artifactId>commons-compress</artifactId>
457457
<!-- no version here as managed by <dependencyManagement> above! -->
458458
</dependency>
459+
<!-- https://mvnrepository.com/artifact/org.apache.xmlgraphics/fop -->
460+
<dependency>
461+
<groupId>org.apache.xmlgraphics</groupId>
462+
<artifactId>fop</artifactId>
463+
<version>2.8</version>
464+
</dependency>
459465
<dependency>
460466
<groupId>org.duracloud</groupId>
461467
<artifactId>common</artifactId>

src/main/java/edu/harvard/iq/dataverse/api/Datasets.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id"
272272

273273
@GET
274274
@Path("/export")
275-
@Produces({"application/xml", "application/json", "application/html" })
275+
@Produces({"application/xml", "application/json", "application/html", "*/*" })
276276
public Response exportDataset(@QueryParam("persistentId") String persistentId, @QueryParam("exporter") String exporter, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) {
277277

278278
try {
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
2+
3+
package edu.harvard.iq.dataverse.export;
4+
5+
import com.google.auto.service.AutoService;
6+
import edu.harvard.iq.dataverse.Dataset;
7+
import edu.harvard.iq.dataverse.DatasetVersion;
8+
import edu.harvard.iq.dataverse.export.ddi.DdiExportUtil;
9+
import io.gdcc.spi.export.ExportDataProvider;
10+
import io.gdcc.spi.export.ExportException;
11+
import io.gdcc.spi.export.Exporter;
12+
import edu.harvard.iq.dataverse.util.BundleUtil;
13+
14+
import javax.json.JsonObject;
15+
import javax.ws.rs.core.MediaType;
16+
import javax.xml.stream.XMLStreamException;
17+
import java.io.File;
18+
import java.io.IOException;
19+
import java.io.InputStream;
20+
import java.io.OutputStream;
21+
import java.nio.file.Path;
22+
import java.nio.file.Paths;
23+
import java.util.Locale;
24+
import java.util.Optional;
25+
26+
@AutoService(Exporter.class)
27+
public class PdfCodeBookExporter implements Exporter {
28+
29+
@Override
30+
public String getFormatName() {
31+
return "pdf";
32+
}
33+
34+
@Override
35+
public String getDisplayName(Locale locale) {
36+
String displayName = BundleUtil.getStringFromBundle("dataset.exportBtn.itemLabel.pdf", locale);
37+
return Optional.ofNullable(displayName).orElse("DDI pdf codebook");
38+
}
39+
40+
@Override
41+
public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException {
42+
Optional<InputStream> ddiInputStreamOptional = dataProvider.getPrerequisiteInputStream();
43+
if (ddiInputStreamOptional.isPresent()) {
44+
try (InputStream ddiInputStream = ddiInputStreamOptional.get()) {
45+
DdiExportUtil.datasetPdfDDI(ddiInputStream, outputStream);
46+
} catch (IOException e) {
47+
throw new ExportException("Cannot open export_ddi cached file");
48+
} catch (XMLStreamException xse) {
49+
throw new ExportException("Caught XMLStreamException performing DDI export");
50+
}
51+
} else {
52+
throw new ExportException("No prerequisite input stream found");
53+
}
54+
}
55+
56+
@Override
57+
public Boolean isHarvestable() {
58+
// No, we don't want this format to be harvested!
59+
// For datasets with tabular data the <data> portions of the DDIs
60+
// become huge and expensive to parse; even as they don't contain any
61+
// metadata useful to remote harvesters. -- L.A. 4.5
62+
return false;
63+
}
64+
65+
@Override
66+
public Boolean isAvailableToUsers() {
67+
return true;
68+
}
69+
70+
@Override
71+
public Optional<String> getPrerequisiteFormatName() {
72+
//This exporter relies on being able to get the output of the ddi exporter
73+
return Optional.of("ddi");
74+
}
75+
76+
@Override
77+
public String getMediaType() {
78+
return MediaType.WILDCARD;
79+
};
80+
}
81+
82+
83+

src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.io.ByteArrayOutputStream;
2828
import java.io.IOException;
2929
import java.io.OutputStream;
30+
import java.net.URL;
3031
import java.nio.file.Files;
3132
import java.nio.file.Paths;
3233
import java.time.LocalDate;
@@ -62,6 +63,21 @@
6263
import javax.xml.transform.stream.StreamResult;
6364
import java.io.InputStream;
6465

66+
67+
import java.io.OutputStream;
68+
import javax.xml.transform.Result;
69+
import javax.xml.transform.Source;
70+
import javax.xml.transform.Transformer;
71+
import javax.xml.transform.TransformerFactory;
72+
import javax.xml.transform.sax.SAXResult;
73+
import javax.xml.transform.stream.StreamSource;
74+
75+
import org.apache.fop.apps.FOUserAgent;
76+
import org.apache.fop.apps.Fop;
77+
import org.apache.fop.apps.FopFactory;
78+
import org.apache.fop.apps.MimeConstants;
79+
80+
6581
public class DdiExportUtil {
6682

6783
private static final Logger logger = Logger.getLogger(DdiExportUtil.class.getCanonicalName());
@@ -2090,6 +2106,40 @@ private static boolean checkParentElement(XMLStreamWriter xmlw, String elementNa
20902106
return true;
20912107
}
20922108

2109+
public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException {
2110+
try {
2111+
InputStream styleSheetInput = DdiExportUtil.class.getClassLoader().getResourceAsStream("edu/harvard/iq/dataverse/ddi-to-fo.xsl");
2112+
2113+
final FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI());
2114+
FOUserAgent foUserAgent = fopFactory.newFOUserAgent();
2115+
2116+
try {
2117+
Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, foUserAgent, outputStream);
2118+
// Setup XSLT
2119+
TransformerFactory factory = TransformerFactory.newInstance();
2120+
Transformer transformer = factory.newTransformer(new StreamSource(styleSheetInput));
2121+
2122+
// Set the value of a <param> in the stylesheet
2123+
transformer.setParameter("versionParam", "2.0");
2124+
2125+
// Setup input for XSLT transformation
2126+
Source src = new StreamSource(datafile);
2127+
2128+
// Resulting SAX events (the generated FO) must be piped through to FOP
2129+
Result res = new SAXResult(fop.getDefaultHandler());
2130+
2131+
// Start XSLT transformation and FOP processing
2132+
transformer.transform(src, res);
2133+
2134+
} catch (Exception e) {
2135+
logger.severe(e.getMessage());
2136+
}
2137+
} catch (Exception e) {
2138+
logger.info("Second try");
2139+
logger.severe(e.getMessage());
2140+
}
2141+
}
2142+
20932143
public static void datasetHtmlDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException {
20942144
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
20952145

src/main/java/propertyFiles/Bundle.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,6 +1401,7 @@ dataset.exportBtn.itemLabel.json=JSON
14011401
dataset.exportBtn.itemLabel.oai_ore=OAI_ORE
14021402
dataset.exportBtn.itemLabel.dataciteOpenAIRE=OpenAIRE
14031403
dataset.exportBtn.itemLabel.html=DDI HTML Codebook
1404+
dataset.exportBtn.itemLabel.pdf=DDI PDF Codebook
14041405
license.custom=Custom Dataset Terms
14051406
license.custom.description=Custom terms specific to this dataset
14061407
metrics.title=Metrics

0 commit comments

Comments
 (0)