Skip to content

Commit a5a2393

Browse files
committed
introduce formatting flag to bring reopening fro that elements in sync with the spec
1 parent 102fc0f commit a5a2393

2 files changed

Lines changed: 47 additions & 34 deletions

File tree

src/main/java/org/htmlunit/cyberneko/HTMLElements.java

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ public HTMLElements() {
214214
// initialize array of element information
215215
elementsArray['A' - 'A'] = new Element[] {
216216
// A - - (%inline;)* -(A)
217-
new Element(A, "A", Element.CONTAINER, BODY, new short[] {A}),
217+
new Element(A, "A", Element.CONTAINER | Element.FORMATTING, BODY, new short[] {A}),
218218
// ABBR - - (%inline;)*
219219
new Element(ABBR, "ABBR", Element.INLINE, BODY, null),
220220
// ACRONYM - - (%inline;)*
@@ -234,7 +234,7 @@ public HTMLElements() {
234234
};
235235
elementsArray['B' - 'A'] = new Element[] {
236236
// B - - (%inline;)*
237-
new Element(B, "B", Element.INLINE, BODY, new short[] {SVG}),
237+
new Element(B, "B", Element.INLINE | Element.FORMATTING, BODY, new short[] {SVG}),
238238
// BASE - O EMPTY
239239
new Element(BASE, "BASE", Element.EMPTY, HEAD, null),
240240
// BASEFONT
@@ -246,7 +246,7 @@ public HTMLElements() {
246246
// BGSOUND
247247
new Element(BGSOUND, "BGSOUND", Element.EMPTY, HEAD, null),
248248
// BIG - - (%inline;)*
249-
new Element(BIG, "BIG", Element.INLINE, BODY, new short[]{SVG}),
249+
new Element(BIG, "BIG", Element.INLINE | Element.FORMATTING, BODY, new short[]{SVG}),
250250
// BLINK
251251
new Element(BLINK, "BLINK", Element.INLINE, BODY, null),
252252
// BLOCKQUOTE - - (%block;|SCRIPT)+
@@ -267,7 +267,7 @@ public HTMLElements() {
267267
// CITE - - (%inline;)*
268268
new Element(CITE, "CITE", Element.INLINE, BODY, null),
269269
// CODE - - (%inline;)*
270-
new Element(CODE, "CODE", Element.INLINE, BODY, new short[]{SVG}),
270+
new Element(CODE, "CODE", Element.INLINE | Element.FORMATTING, BODY, new short[]{SVG}),
271271
// COL - O EMPTY
272272
new Element(COL, "COL", Element.EMPTY, COLGROUP, null),
273273
// COLGROUP - O (COL)*
@@ -301,7 +301,7 @@ public HTMLElements() {
301301
};
302302
elementsArray['E' - 'A'] = new Element[] {
303303
// EM - - (%inline;)*
304-
new Element(EM, "EM", Element.INLINE, BODY, new short[]{SVG}),
304+
new Element(EM, "EM", Element.INLINE | Element.FORMATTING, BODY, new short[]{SVG}),
305305
// EMBED
306306
new Element(EMBED, "EMBED", Element.EMPTY, BODY, new short[]{SVG}),
307307
};
@@ -313,7 +313,7 @@ public HTMLElements() {
313313

314314
new Element(FIGURE, "FIGURE", Element.BLOCK, BODY, new short[] {P}),
315315
// FONT
316-
new Element(FONT, "FONT", Element.CONTAINER, BODY, null),
316+
new Element(FONT, "FONT", Element.CONTAINER | Element.FORMATTING, BODY, null),
317317

318318
new Element(FOOTER, "FOOTER", Element.BLOCK, BODY, new short[] {P}),
319319

@@ -344,7 +344,7 @@ public HTMLElements() {
344344
};
345345
elementsArray['I' - 'A'] = new Element[] {
346346
// I - - (%inline;)*
347-
new Element(I, "I", Element.INLINE, BODY, new short[]{SVG}),
347+
new Element(I, "I", Element.INLINE | Element.FORMATTING, BODY, new short[]{SVG}),
348348
// IFRAME
349349
new Element(IFRAME, "IFRAME", Element.BLOCK, BODY, null),
350350
// ILAYER
@@ -401,7 +401,7 @@ public HTMLElements() {
401401
// NEXTID
402402
new Element(NEXTID, "NEXTID", Element.INLINE, BODY, null),
403403
// NOBR
404-
new Element(NOBR, "NOBR", Element.INLINE, BODY, new short[]{NOBR, SVG}),
404+
new Element(NOBR, "NOBR", Element.INLINE | Element.FORMATTING, BODY, new short[]{NOBR, SVG}),
405405
// NOEMBED
406406
new Element(NOEMBED, "NOEMBED", Element.CONTAINER, BODY, null),
407407
// NOFRAMES - - (BODY) -(NOFRAMES)
@@ -457,7 +457,7 @@ public HTMLElements() {
457457
};
458458
elementsArray['S' - 'A'] = new Element[] {
459459
// S
460-
new Element(S, "S", Element.INLINE, BODY, new short[]{SVG}),
460+
new Element(S, "S", Element.INLINE | Element.FORMATTING, BODY, new short[]{SVG}),
461461
// SAMP - - (%inline;)*
462462
new Element(SAMP, "SAMP", Element.INLINE, BODY, null),
463463
// SCRIPT - - %Script;
@@ -470,7 +470,7 @@ public HTMLElements() {
470470

471471
new Element(SLOT, "SLOT", Element.CONTAINER, BODY, null),
472472
// SMALL - - (%inline;)*
473-
new Element(SMALL, "SMALL", Element.INLINE, BODY, new short[]{SVG}),
473+
new Element(SMALL, "SMALL", Element.INLINE | Element.FORMATTING, BODY, new short[]{SVG}),
474474
// SOUND
475475
new Element(SOUND, "SOUND", Element.EMPTY, HEAD, null),
476476

@@ -480,9 +480,9 @@ public HTMLElements() {
480480
// SPAN - - (%inline;)*
481481
new Element(SPAN, "SPAN", Element.CONTAINER, BODY, new short[]{SVG}),
482482
// STRIKE
483-
new Element(STRIKE, "STRIKE", Element.INLINE, BODY, new short[]{SVG}),
483+
new Element(STRIKE, "STRIKE", Element.INLINE | Element.FORMATTING, BODY, new short[]{SVG}),
484484
// STRONG - - (%inline;)*
485-
new Element(STRONG, "STRONG", Element.INLINE, BODY, new short[]{SVG}),
485+
new Element(STRONG, "STRONG", Element.INLINE | Element.FORMATTING, BODY, new short[]{SVG}),
486486
// STYLE - - %StyleSheet;
487487
new Element(STYLE, "STYLE", Element.SPECIAL, new short[]{HEAD, BODY}, new short[]{STYLE, TITLE, META}),
488488
// SUB - - (%inline;)*
@@ -523,11 +523,11 @@ public HTMLElements() {
523523

524524
new Element(TRACK, "TRACK", Element.EMPTY, BODY, null),
525525
// TT - - (%inline;)*
526-
new Element(TT, "TT", Element.INLINE, BODY, new short[]{SVG}),
526+
new Element(TT, "TT", Element.INLINE | Element.FORMATTING, BODY, new short[]{SVG}),
527527
};
528528
elementsArray['U' - 'A'] = new Element[] {
529529
// U,
530-
new Element(U, "U", Element.INLINE, BODY, new short[]{SVG}),
530+
new Element(U, "U", Element.INLINE | Element.FORMATTING, BODY, new short[]{SVG}),
531531
// UL - - (LI)+
532532
new Element(UL, "UL", Element.CONTAINER, BODY, new short[] {P, SVG}),
533533
};
@@ -771,6 +771,12 @@ public static class Element {
771771
*/
772772
public static final int SCRIPT_SUPPORTING = 0x20;
773773

774+
/**
775+
* Formatting elements.
776+
* <a href='https://html.spec.whatwg.org/#formatting'>active formatting elements</a>
777+
*/
778+
public static final int FORMATTING = 0x40;
779+
774780
/** The element code. */
775781
public final short code;
776782

@@ -903,6 +909,13 @@ public final boolean isScriptSupporting() {
903909
return (flags & SCRIPT_SUPPORTING) != 0;
904910
}
905911

912+
/**
913+
* @return true if this element is formatting one.
914+
*/
915+
public final boolean isFormatting() {
916+
return (flags & FORMATTING) != 0;
917+
}
918+
906919
/**
907920
* @return true if this element can close the specified Element.
908921
*

src/main/java/org/htmlunit/cyberneko/HTMLTagBalancer.java

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ public class HTMLTagBalancer
199199
protected final InfoStack fElementStack = new InfoStack();
200200

201201
/** The inline stack. */
202-
protected final InfoStack fInlineStack = new InfoStack();
202+
protected final InfoStack fFormattingStack = new InfoStack();
203203

204204
/** True if seen anything. Important for xml declaration. */
205205
protected boolean fSeenAnything;
@@ -613,7 +613,7 @@ public void startElement(final QName elem, XMLAttributes attrs, final Augmentati
613613
final HTMLElements.Element element = getElement(elem);
614614
final short elementCode = element.code;
615615

616-
reopenInlineElements(null);
616+
reopenFormattingElements(null);
617617

618618
if (elementCode == HTMLElements.TEMPLATE) {
619619
fTemplateFragment = true;
@@ -830,20 +830,20 @@ else if (!fTemplateFragment && elementCode == HTMLElements.SELECT) {
830830
fOpenedSelect = true;
831831
}
832832

833-
// if block element, save immediate parent inline elements
833+
// if block element, save immediate parent formatting elements
834834
int depth = 0;
835835
if (element.flags == 0) {
836836
final int length = fElementStack.top;
837-
fInlineStack.top = 0;
837+
fFormattingStack.top = 0;
838838
for (int i = length - 1; i >= 0; i--) {
839839
final Info info = fElementStack.data[i];
840-
if (!info.element.isInline()) {
840+
if (!info.element.isFormatting()) {
841841
break;
842842
}
843-
fInlineStack.push(info);
843+
fFormattingStack.push(info);
844844
endElement(info.qname, synthesizedAugs());
845845
}
846-
depth = fInlineStack.top;
846+
depth = fFormattingStack.top;
847847
}
848848

849849
// close previous elements
@@ -920,9 +920,9 @@ else if (!fTemplateFragment && elementCode == HTMLElements.SELECT) {
920920
}
921921
}
922922

923-
// re-open inline elements
923+
// re-open formatting elements
924924
for (int i = 0; i < depth; i++) {
925-
final Info info = fInlineStack.pop();
925+
final Info info = fFormattingStack.pop();
926926
forceStartElement(info.qname, info.attributes, synthesizedAugs());
927927
}
928928

@@ -982,7 +982,7 @@ public void startCDATA(final Augmentations augs) throws XNIException {
982982
fSeenAnything = true;
983983

984984
consumeEarlyTextIfNeeded();
985-
reopenInlineElements(null);
985+
reopenFormattingElements(null);
986986

987987
// check for end of document
988988
if (fSeenRootElementEnd) {
@@ -1026,7 +1026,7 @@ public void characters(final XMLString text, final Augmentations augs) throws XN
10261026
return;
10271027
}
10281028

1029-
reopenInlineElements(null);
1029+
reopenFormattingElements(null);
10301030

10311031
if (!fDocumentFragment) {
10321032
// handle bare characters
@@ -1096,7 +1096,7 @@ public void endElement(final QName element, final Augmentations augs) throws XNI
10961096
final HTMLElements.Element elem = getElement(element);
10971097
final short elementCode = elem.code;
10981098

1099-
if (reopenInlineElements(elem)) {
1099+
if (reopenFormattingElements(elem)) {
11001100
return;
11011101
}
11021102

@@ -1178,18 +1178,18 @@ else if (!elem.isEmpty()) {
11781178
return;
11791179
}
11801180

1181-
// find unbalanced inline elements
1181+
// find unbalanced formatting elements
11821182
if (depth > 1 && elem.isInline()) {
11831183
final int size = fElementStack.top;
1184-
fInlineStack.top = 0;
1184+
fFormattingStack.top = 0;
11851185
for (int i = 0; i < depth - 1; i++) {
11861186
final Info info = fElementStack.data[size - i - 1];
11871187
final HTMLElements.Element pelem = info.element;
1188-
if (pelem.isInline() || pelem.code == HTMLElements.FONT) { // TODO: investigate if only FONT
1188+
if (pelem.isFormatting()) {
11891189
// NOTE: I don't have to make a copy of the info because
11901190
// it will just be popped off of the element stack
11911191
// as soon as we close it, anyway.
1192-
fInlineStack.push(info);
1192+
fFormattingStack.push(info);
11931193
}
11941194
}
11951195
}
@@ -1211,15 +1211,15 @@ else if (!elem.isEmpty()) {
12111211
}
12121212

12131213
// re-open inline elements
1214-
protected boolean reopenInlineElements(final HTMLElements.Element element) {
1215-
final int size = fInlineStack.top;
1214+
protected boolean reopenFormattingElements(final HTMLElements.Element element) {
1215+
final int size = fFormattingStack.top;
12161216

12171217
if (size == 0) {
12181218
return false;
12191219
}
12201220

12211221
int i = 0;
1222-
Info info = fInlineStack.pop();
1222+
Info info = fFormattingStack.pop();
12231223
XMLAttributes attributes = info.attributes;
12241224
if (fReportErrors) {
12251225
final String iname = info.qname.getRawname();
@@ -1234,7 +1234,7 @@ protected boolean reopenInlineElements(final HTMLElements.Element element) {
12341234
i++;
12351235

12361236
for ( ; i < size; i++) {
1237-
info = fInlineStack.pop();
1237+
info = fFormattingStack.pop();
12381238
attributes = info.attributes;
12391239
if (fReportErrors) {
12401240
final String iname = info.qname.getRawname();

0 commit comments

Comments
 (0)