Skip to content

Commit 076de3f

Browse files
committed
Handle hyperlinked wp:anchor and wp:inline elements
1 parent 49105a4 commit 076de3f

5 files changed

Lines changed: 119 additions & 17 deletions

File tree

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 1.12.0
2+
3+
* Handle hyperlinked wp:anchor and wp:inline elements.
4+
15
# 1.11.0
26

37
* Ignore style definitions using a style ID that has already been used.

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ For instance:
330330
automatically convert the document into HTML on the server,
331331
and embed the HTML into your website,
332332
this may allow arbitrary files on the server to be read and exfiltrated.
333+
333334
To avoid this issue, access to any such external files is disabled by default.
334335
To enable access when converting trusted source documents,
335336
call `enableExternalFileAccess()`.

src/main/java/org/zwobble/mammoth/internal/docx/StatefulBodyXmlReader.java

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -694,34 +694,52 @@ private ReadResult readImagedata(XmlElement element) {
694694

695695
private ReadResult readInline(XmlElement element) {
696696
XmlElementLike properties = element.findChildOrEmpty("wp:docPr");
697+
697698
Optional<String> altText = Optionals.first(
698699
properties.getAttributeOrNone("descr").filter(description -> !description.trim().isEmpty()),
699700
properties.getAttributeOrNone("title")
700701
);
702+
703+
XmlElementLike hlinkClickElement = properties.findChildOrEmpty("a:hlinkClick");
704+
Optional<String> href = hlinkClickElement.getAttributeOrNone("r:id")
705+
.map(relationships::findTargetByRelationshipId);
706+
701707
XmlElementList blips = element.findChildren("a:graphic")
702708
.findChildren("a:graphicData")
703709
.findChildren("pic:pic")
704710
.findChildren("pic:blipFill")
705711
.findChildren("a:blip");
706-
return readBlips(blips, altText);
712+
return readBlips(blips, altText, href);
707713
}
708714

709-
private ReadResult readBlips(XmlElementList blips, Optional<String> altText) {
710-
return ReadResult.flatMap(blips, blip -> readBlip(blip, altText));
715+
private ReadResult readBlips(
716+
XmlElementList blips,
717+
Optional<String> altText,
718+
Optional<String> href
719+
) {
720+
return ReadResult.flatMap(blips, blip -> readBlip(blip, altText, href));
711721
}
712722

713-
private ReadResult readBlip(XmlElement blip, Optional<String> altText) {
714-
Optional<String> embedRelationshipId = blip.getAttributeOrNone("r:embed");
715-
Optional<String> linkRelationshipId = blip.getAttributeOrNone("r:link");
716-
if (embedRelationshipId.isPresent()) {
717-
String imagePath = relationshipIdToDocxPath(embedRelationshipId.get());
718-
return readImage(imagePath, altText, () -> Archives.getInputStream(file, imagePath));
719-
} else if (linkRelationshipId.isPresent()) {
720-
String imagePath = relationships.findTargetByRelationshipId(linkRelationshipId.get());
721-
return readImage(imagePath, altText, () -> fileReader.getInputStream(imagePath));
722-
} else {
723+
private ReadResult readBlip(
724+
XmlElement blip,
725+
Optional<String> altText,
726+
Optional<String> href
727+
) {
728+
Optional<BlipImage> blipImage = findBlipImage(blip);
729+
730+
if (!blipImage.isPresent()) {
723731
return ReadResult.emptyWithWarning("Could not find image file for a:blip element");
724732
}
733+
734+
ReadResult result = readImage(blipImage.get().path, altText, blipImage.get().open);
735+
736+
if (!href.isPresent()) {
737+
return result;
738+
}
739+
740+
return result.map(imageElements -> list(
741+
new Hyperlink(href, Optional.empty(), Optional.empty(), imageElements)
742+
));
725743
}
726744

727745
private ReadResult readImage(String imagePath, Optional<String> altText, InputStreamSupplier open) {
@@ -736,6 +754,36 @@ private ReadResult readImage(String imagePath, Optional<String> altText, InputSt
736754
}
737755
}
738756

757+
private static class BlipImage {
758+
private final String path;
759+
private final InputStreamSupplier open;
760+
761+
public BlipImage(String path, InputStreamSupplier open) {
762+
this.path = path;
763+
this.open = open;
764+
}
765+
}
766+
767+
private Optional<BlipImage> findBlipImage(XmlElement blip) {
768+
Optional<String> embedRelationshipId = blip.getAttributeOrNone("r:embed");
769+
Optional<String> linkRelationshipId = blip.getAttributeOrNone("r:link");
770+
if (embedRelationshipId.isPresent()) {
771+
String imagePath = relationshipIdToDocxPath(embedRelationshipId.get());
772+
return Optional.of(new BlipImage(
773+
imagePath,
774+
() -> Archives.getInputStream(file, imagePath)
775+
));
776+
} else if (linkRelationshipId.isPresent()) {
777+
String imagePath = relationships.findTargetByRelationshipId(linkRelationshipId.get());
778+
return Optional.of(new BlipImage(
779+
imagePath,
780+
() -> fileReader.getInputStream(imagePath)
781+
));
782+
} else {
783+
return Optional.empty();
784+
}
785+
}
786+
739787
private ReadResult readSdt(XmlElement element) {
740788
ReadResult contentResult = readElements(
741789
element.findChildOrEmpty("w:sdtContent").getChildren()

src/test/java/org/zwobble/mammoth/tests/docx/BodyXmlTests.java

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1944,6 +1944,42 @@ public void noElementsCreatedIfImageCannotBeFoundInWpInline() {
19441944
assertThat(result, isInternalSuccess(empty()));
19451945
}
19461946

1947+
@Test
1948+
public void canReadPicturesWithHyperlinkSpecifiedInDocumentProperties() throws IOException {
1949+
XmlElement element = element("w:drawing", list(
1950+
element("wp:inline", list(
1951+
element("wp:docPr", map(), list(
1952+
element("a:hlinkClick", map("r:id", "rId42"))
1953+
)),
1954+
graphicXml(embeddedBlipXml(IMAGE_RELATIONSHIP_ID))
1955+
))
1956+
));
1957+
Relationships relationships = new Relationships(list(
1958+
imageRelationship(IMAGE_RELATIONSHIP_ID, "media/hat.png"),
1959+
hyperlinkRelationship("rId42", "http://example.com")
1960+
));
1961+
Archive file = InMemoryArchive.fromStrings(map("word/media/hat.png", IMAGE_BYTES));
1962+
1963+
DocumentElement result = readSuccess(
1964+
bodyReader(relationships, file),
1965+
element
1966+
);
1967+
1968+
assertThat(result, isHyperlink(
1969+
hasHref("http://example.com"),
1970+
hasChildren(
1971+
isImage(
1972+
hasProperty("contentType", deepEquals(Optional.of("image/png")))
1973+
)
1974+
)
1975+
));
1976+
Image image = (Image) ((Hyperlink) result).getChildren().get(0);
1977+
assertThat(
1978+
toString(image.open()),
1979+
equalTo(IMAGE_BYTES)
1980+
);
1981+
}
1982+
19471983
private XmlElement inlineImageXml(XmlElement blip, String description) {
19481984
return inlineImageXml(blip, Optional.of(description), Optional.empty());
19491985
}
@@ -1965,10 +2001,18 @@ private List<XmlNode> imageXml(XmlElement blip, Optional<String> description, Op
19652001

19662002
return list(
19672003
element("wp:docPr", properties),
1968-
element("a:graphic", list(
1969-
element("a:graphicData", list(
1970-
element("pic:pic", list(
1971-
element("pic:blipFill", list(blip)))))))));
2004+
graphicXml(blip)
2005+
);
2006+
}
2007+
2008+
private XmlNode graphicXml(XmlElement blip) {
2009+
return element("a:graphic", list(
2010+
element("a:graphicData", list(
2011+
element("pic:pic", list(
2012+
element("pic:blipFill", list(blip))
2013+
))
2014+
))
2015+
));
19722016
}
19732017

19742018
private XmlElement embeddedBlipXml(String relationshipId) {

src/test/java/org/zwobble/mammoth/tests/docx/DocumentMatchers.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,4 +122,9 @@ static Matcher<Hyperlink> hasTargetFrame(String targetFrame) {
122122
static Matcher<Hyperlink> hasNoTargetFrame() {
123123
return hasProperty("targetFrame", equalTo(Optional.empty()));
124124
}
125+
126+
@SafeVarargs
127+
static Matcher<DocumentElement> isImage(Matcher<? super Image>... matchers) {
128+
return cast(Image.class, allOf(matchers));
129+
}
125130
}

0 commit comments

Comments
 (0)