Skip to content

Commit 98c9854

Browse files
committed
Handle numbering levels defined without an index
1 parent 444dc1b commit 98c9854

6 files changed

Lines changed: 112 additions & 8 deletions

File tree

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
* Support disabling external file accesses using the external_file_access argument.
66

7+
* Handle numbering levels defined without an index.
8+
79
# 1.10.0
810

911
* Add "Heading" and "Body" styles, as found in documents created by Apple Pages,

src/main/java/org/zwobble/mammoth/internal/docx/Numbering.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ public AbstractNumLevel(String levelIndex, boolean isOrdered, Optional<String> p
4141
this.paragraphStyleId = paragraphStyleId;
4242
}
4343

44+
public String levelIndex() {
45+
return levelIndex;
46+
}
47+
4448
public NumberingLevel toNumberingLevel() {
4549
return new NumberingLevel(levelIndex, isOrdered);
4650
}

src/main/java/org/zwobble/mammoth/internal/docx/NumberingXml.java

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import org.zwobble.mammoth.internal.xml.XmlElement;
44
import org.zwobble.mammoth.internal.xml.XmlElementList;
55

6+
import java.util.HashMap;
67
import java.util.Map;
78
import java.util.Optional;
89

@@ -31,15 +32,42 @@ private static Map.Entry<String, Numbering.AbstractNum> readAbstractNum(XmlEleme
3132
}
3233

3334
private static Map<String, Numbering.AbstractNumLevel> readAbstractNumLevels(XmlElement element) {
34-
return toMap(element.findChildren("w:lvl"), NumberingXml::readAbstractNumLevel);
35+
Map<String, Numbering.AbstractNumLevel> levels = new HashMap<>();
36+
37+
// Some malformed documents define numbering levels without an index, and
38+
// reference the numbering using a w:numPr element without a w:ilvl child.
39+
// To handle such cases, we assume a level of 0 as a fallback.
40+
Optional<Numbering.AbstractNumLevel> levelWithoutIndex = Optional.empty();
41+
42+
for (XmlElement levelElement : element.findChildren("w:lvl")) {
43+
Map.Entry<Optional<String>, Numbering.AbstractNumLevel> entry =
44+
readAbstractNumLevel(levelElement);
45+
46+
if (entry.getKey().isPresent()) {
47+
levels.put(entry.getKey().get(), entry.getValue());
48+
} else {
49+
levelWithoutIndex = Optional.of(entry.getValue());
50+
}
51+
}
52+
53+
if (levelWithoutIndex.isPresent() && !levels.containsKey(levelWithoutIndex.get().levelIndex())) {
54+
levels.put(levelWithoutIndex.get().levelIndex(), levelWithoutIndex.get());
55+
}
56+
57+
return levels;
3558
}
3659

37-
private static Map.Entry<String, Numbering.AbstractNumLevel> readAbstractNumLevel(XmlElement element) {
38-
String levelIndex = element.getAttribute("w:ilvl");
60+
private static Map.Entry<Optional<String>, Numbering.AbstractNumLevel> readAbstractNumLevel(XmlElement element) {
61+
Optional<String> levelIndex = element.getAttributeOrNone("w:ilvl");
3962
Optional<String> numFmt = element.findChildOrEmpty("w:numFmt").getAttributeOrNone("w:val");
4063
boolean isOrdered = !numFmt.equals(Optional.of("bullet"));
4164
Optional<String> paragraphStyleId = element.findChildOrEmpty("w:pStyle").getAttributeOrNone("w:val");
42-
return entry(levelIndex, new Numbering.AbstractNumLevel(levelIndex, isOrdered, paragraphStyleId));
65+
Numbering.AbstractNumLevel abstractNumLevel = new Numbering.AbstractNumLevel(
66+
levelIndex.orElse("0"),
67+
isOrdered,
68+
paragraphStyleId
69+
);
70+
return entry(levelIndex, abstractNumLevel);
4371
}
4472

4573
private static Map<String, Numbering.Num> readNums(XmlElementList numElements) {

src/main/java/org/zwobble/mammoth/internal/docx/StatefulBodyXmlReader.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,13 @@ private Optional<NumberingLevel> readNumbering(Optional<Style> style, XmlElement
429429
}
430430
}
431431

432+
// Some malformed documents define numbering levels without an index, and
433+
// reference the numbering using a w:numPr element without a w:ilvl child.
434+
// To handle such cases, we assume a level of 0 as a fallback.
435+
if (numId.isPresent()) {
436+
return numbering.findLevel(numId.get(), "0");
437+
}
438+
432439
return Optional.empty();
433440
}
434441

src/test/java/org/zwobble/mammoth/tests/docx/BodyXmlTests.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,18 +259,21 @@ public void numberingPropertiesInParagraphPropertiesTakesPrecedenceOverNumbering
259259
}
260260

261261
@Test
262-
public void numberingPropertiesAreIgnoredIfLevelIsMissing() {
262+
public void whenNumberingPropertiesAreMissingLevelThenLevelOf0IsAssumed() {
263263
// TODO: emit warning
264264
XmlElement element = paragraphXml(list(
265265
element("w:pPr", list(
266266
element("w:numPr", map(), list(
267-
element("w:numId", map("w:val", "42"))))))));
267+
element("w:numId", map("w:val", "42"))
268+
))
269+
))
270+
));
268271

269-
Numbering numbering = numberingMap(map("42", map("1", Numbering.AbstractNumLevel.ordered("1"))));
272+
Numbering numbering = numberingMap(map("42", map("0", Numbering.AbstractNumLevel.ordered("0"))));
270273

271274
assertThat(
272275
readSuccess(bodyReader(numbering), element),
273-
hasNumbering(Optional.empty()));
276+
hasNumbering(NumberingLevel.ordered("0")));
274277
}
275278

276279
@Test

src/test/java/org/zwobble/mammoth/tests/docx/NumberingXmlTests.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,66 @@ public void numReferencingNonExistentAbstractNumIsIgnored() {
7878
assertEquals(Optional.empty(), numbering.findLevel("47", "0"));
7979
}
8080

81+
@Test
82+
public void givenNoOtherLevelsWithIndexOf0WhenLevelIsMissingIlvlThenLevelIndexIs0() {
83+
XmlElement element = element("w:numbering", list(
84+
element("w:abstractNum", map("w:abstractNumId", "42"), list(
85+
element("w:lvl", map(), list(
86+
element("w:numFmt", map("w:val", "decimal"))
87+
))
88+
)),
89+
element("w:num", map("w:numId", "47"), list(
90+
element("w:abstractNumId", map("w:val", "42"))
91+
))
92+
));
93+
94+
Numbering numbering = readNumbering(element);
95+
96+
assertEquals(true, numbering.findLevel("47", "0").get().isOrdered());
97+
}
98+
99+
@Test
100+
public void givenPreviousOtherLevelWithIndexOf0WhenLevelIsMissingIlvlThenLevelIsIgnored() {
101+
XmlElement element = element("w:numbering", list(
102+
element("w:abstractNum", map("w:abstractNumId", "42"), list(
103+
element("w:lvl", map("w:ilvl", "0"), list(
104+
element("w:numFmt", map("w:val", "bullet"))
105+
)),
106+
element("w:lvl", map(), list(
107+
element("w:numFmt", map("w:val", "decimal"))
108+
))
109+
)),
110+
element("w:num", map("w:numId", "47"), list(
111+
element("w:abstractNumId", map("w:val", "42"))
112+
))
113+
));
114+
115+
Numbering numbering = readNumbering(element);
116+
117+
assertEquals(false, numbering.findLevel("47", "0").get().isOrdered());
118+
}
119+
120+
@Test
121+
public void givenSubsequentOtherLevelWithIndexOf0WhenLevelIsMissingIlvlThenLevelIsIgnored() {
122+
XmlElement element = element("w:numbering", list(
123+
element("w:abstractNum", map("w:abstractNumId", "42"), list(
124+
element("w:lvl", map(), list(
125+
element("w:numFmt", map("w:val", "decimal"))
126+
)),
127+
element("w:lvl", map("w:ilvl", "0"), list(
128+
element("w:numFmt", map("w:val", "bullet"))
129+
))
130+
)),
131+
element("w:num", map("w:numId", "47"), list(
132+
element("w:abstractNumId", map("w:val", "42"))
133+
))
134+
));
135+
136+
Numbering numbering = readNumbering(element);
137+
138+
assertEquals(false, numbering.findLevel("47", "0").get().isOrdered());
139+
}
140+
81141
@Test
82142
public void whenAbstractNumHasNumStyleLinkThenStyleIsUsedToFindNum() {
83143
Numbering numbering = readNumberingXmlElement(

0 commit comments

Comments
 (0)