Skip to content

Commit 7c31107

Browse files
committed
refactor reopening of dangling inline elements, make it lazy to be able to supress the repopen in case of empty elements
1 parent 9658053 commit 7c31107

3 files changed

Lines changed: 54 additions & 13 deletions

File tree

src/main/java/org/htmlunit/cyberneko/HTMLTagBalancer.java

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,8 @@ public void startElement(final QName elem, XMLAttributes attrs, final Augmentati
613613
final HTMLElements.Element element = getElement(elem);
614614
final short elementCode = element.code;
615615

616+
reopenInlineElements(null);
617+
616618
if (elementCode == HTMLElements.TEMPLATE) {
617619
fTemplateFragment = true;
618620
}
@@ -980,6 +982,7 @@ public void startCDATA(final Augmentations augs) throws XNIException {
980982
fSeenAnything = true;
981983

982984
consumeEarlyTextIfNeeded();
985+
reopenInlineElements(null);
983986

984987
// check for end of document
985988
if (fSeenRootElementEnd) {
@@ -1023,6 +1026,8 @@ public void characters(final XMLString text, final Augmentations augs) throws XN
10231026
return;
10241027
}
10251028

1029+
reopenInlineElements(null);
1030+
10261031
if (!fDocumentFragment) {
10271032
// handle bare characters
10281033
if (!fSeenRootElement) {
@@ -1091,6 +1096,10 @@ public void endElement(final QName element, final Augmentations augs) throws XNI
10911096
final HTMLElements.Element elem = getElement(element);
10921097
final short elementCode = elem.code;
10931098

1099+
if (reopenInlineElements(elem)) {
1100+
return;
1101+
}
1102+
10941103
if (!fTemplateFragment && fOpenedSelect) {
10951104
if (elementCode == HTMLElements.SELECT) {
10961105
fOpenedSelect = false;
@@ -1199,21 +1208,42 @@ else if (!elem.isEmpty()) {
11991208
callEndElement(info.qname, i < depth - 1 ? synthesizedAugs() : augs);
12001209
}
12011210
}
1211+
}
12021212

1203-
// re-open inline elements
1204-
if (depth > 1) {
1205-
final int size = fInlineStack.top;
1206-
for (int i = 0; i < size; i++) {
1207-
final Info info = fInlineStack.pop();
1208-
final XMLAttributes attributes = info.attributes;
1209-
if (fReportErrors) {
1210-
final String iname = info.qname.getRawname();
1211-
fErrorReporter.reportWarning("HTML2008", new Object[]{iname});
1212-
}
1213-
forceStartElement(info.qname, attributes, synthesizedAugs());
1214-
}
1213+
// re-open inline elements
1214+
protected boolean reopenInlineElements(final HTMLElements.Element element) {
1215+
final int size = fInlineStack.top;
1216+
1217+
if (size == 0) {
1218+
return false;
12151219
}
12161220

1221+
int i = 0;
1222+
Info info = fInlineStack.pop();
1223+
XMLAttributes attributes = info.attributes;
1224+
if (fReportErrors) {
1225+
final String iname = info.qname.getRawname();
1226+
fErrorReporter.reportWarning("HTML2008", new Object[]{iname});
1227+
}
1228+
1229+
if (element != null && info.element.code == element.code) {
1230+
return true;
1231+
}
1232+
1233+
forceStartElement(info.qname, attributes, synthesizedAugs());
1234+
i++;
1235+
1236+
for ( ; i < size; i++) {
1237+
info = fInlineStack.pop();
1238+
attributes = info.attributes;
1239+
if (fReportErrors) {
1240+
final String iname = info.qname.getRawname();
1241+
fErrorReporter.reportWarning("HTML2008", new Object[]{iname});
1242+
}
1243+
1244+
forceStartElement(info.qname, attributes, synthesizedAugs());
1245+
}
1246+
return false;
12171247
}
12181248

12191249
// Returns an HTML element.
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
<html>
22
<body>
33
<b><i>Bold and italic</b></i>
4-
<strong><em>Strong and emphasis</strong></em>
4+
<strong><em>Strong and emphasis</strong> </em>
5+
<strong><em>text between</strong>some</em>
56
</body>
67
</html>

src/test/resources/org/htmlunit/cyberneko/testfiles/malformed/test-overlapping-tags.html.canonical

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@
1515
)em
1616
)strong
1717
(em
18+
"
19+
)em
20+
"\n
21+
(strong
22+
(em
23+
"text between
24+
)em
25+
)strong
26+
(em
27+
"some
1828
)em
1929
"\n\n
2030
)body

0 commit comments

Comments
 (0)