Skip to content

Commit a526c40

Browse files
committed
stuff
1 parent e80967c commit a526c40

6 files changed

Lines changed: 401 additions & 71 deletions

File tree

src/main/java/ch/digitalfondue/jfiveparse/Common.java

Lines changed: 151 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import java.util.AbstractList;
1919
import java.util.ArrayList;
2020
import java.util.HashMap;
21+
import java.util.List;
2122
import java.util.function.IntFunction;
2223

2324
/**
@@ -154,12 +155,22 @@ static void adjustSVGAttributes(Attributes attrs) {
154155
return;
155156
}
156157

157-
for (String lowerCaseAttr : new ArrayList<>(attrs.keySet())) {
158-
if (SVG_ATTRIBUTES.containsKey(lowerCaseAttr)) {
158+
List<String> toAdjust = null;
159+
for (AttributeNode attr : attrs) {
160+
if (SVG_ATTRIBUTES.containsKey(attr.name)) {
161+
if (toAdjust == null) {
162+
toAdjust = new ArrayList<>(2);
163+
}
164+
toAdjust.add(attr.name);
165+
}
166+
}
167+
168+
if (toAdjust != null) {
169+
for (String lowerCaseAttr : toAdjust) {
159170
AttributeNode attr = attrs.get(lowerCaseAttr);
171+
attrs.remove(lowerCaseAttr);
160172
attr.name = SVG_ATTRIBUTES.get(lowerCaseAttr);
161173
attrs.put(attr);
162-
attrs.remove(lowerCaseAttr);
163174
}
164175
}
165176
}
@@ -185,15 +196,25 @@ static void adjustForeignAttributes(Attributes attrs) {
185196
return;
186197
}
187198

188-
for (String lowerCaseAttr: new ArrayList<>(attrs.keySet())) {
189-
if (FOREIGN_ATTRIBUTES_TO_ADJUST.containsKey(lowerCaseAttr)) {
199+
List<String> toAdjust = null;
200+
for (AttributeNode attr : attrs) {
201+
if (FOREIGN_ATTRIBUTES_TO_ADJUST.containsKey(attr.name)) {
202+
if (toAdjust == null) {
203+
toAdjust = new ArrayList<>(2);
204+
}
205+
toAdjust.add(attr.name);
206+
}
207+
}
208+
209+
if (toAdjust != null) {
210+
for (String lowerCaseAttr : toAdjust) {
190211
String[] adj = FOREIGN_ATTRIBUTES_TO_ADJUST.get(lowerCaseAttr);
191212
AttributeNode attr = attrs.get(lowerCaseAttr);
213+
attrs.remove(lowerCaseAttr);
192214
attr.prefix = adj[0];
193215
attr.name = adj[1];
194216
attr.namespace = adj[2];
195217
attrs.put(attr);
196-
attrs.remove(lowerCaseAttr);
197218
}
198219
}
199220
}
@@ -389,6 +410,7 @@ static int tagNameToID(String tagName) {
389410
case "math" -> ELEMENT_MATH_ID;
390411
case "svg" -> ELEMENT_SVG_ID;
391412
case "ruby" -> ELEMENT_RUBY_ID;
413+
//
392414
case "span" -> ELEMENT_SPAN_ID;
393415
case "sub" -> ELEMENT_SUB_ID;
394416
case "sup" -> ELEMENT_SUP_ID;
@@ -397,7 +419,6 @@ static int tagNameToID(String tagName) {
397419
};
398420
}
399421

400-
// this order is the SPECIAL_ELEMENTS_HTML container from 1 to 81
401422
static final int ELEMENT_ADDRESS_ID = 1;
402423
static final int ELEMENT_APPLET_ID = 2;
403424
static final int ELEMENT_AREA_ID = 3;
@@ -479,16 +500,13 @@ static int tagNameToID(String tagName) {
479500
static final int ELEMENT_UL_ID = 79;
480501
static final int ELEMENT_WBR_ID = 80;
481502
static final int ELEMENT_XMP_ID = 81;
482-
// end this order is the SPECIAL_ELEMENTS_HTML container from 1 to 81
483503
static final int ELEMENT_OPTGROUP_ID = 82;
484504
static final int ELEMENT_OPTION_ID = 83;
485505
static final int ELEMENT_RB_ID = 84;
486506
static final int ELEMENT_RP_ID = 85;
487507
static final int ELEMENT_RT_ID = 86;
488508
static final int ELEMENT_RTC_ID = 87;
489-
//
490509
static final int ELEMENT_A_ID = 88;
491-
//
492510
static final int ELEMENT_DIALOG_ID = 89;
493511
static final int ELEMENT_SEARCH_ID = 90;
494512
static final int ELEMENT_B_ID = 91;
@@ -509,12 +527,134 @@ static int tagNameToID(String tagName) {
509527
static final int ELEMENT_MATH_ID = 106;
510528
static final int ELEMENT_SVG_ID = 107;
511529
static final int ELEMENT_RUBY_ID = 108;
512-
//
513530
static final int ELEMENT_SPAN_ID = 109;
514531
static final int ELEMENT_SUB_ID = 110;
515532
static final int ELEMENT_SUP_ID = 111;
516533
static final int ELEMENT_VAR_ID = 112;
517534

535+
private static final String[] ID_TO_TAGNAME = new String[113];
536+
static {
537+
ID_TO_TAGNAME[ELEMENT_ADDRESS_ID] = "address";
538+
ID_TO_TAGNAME[ELEMENT_APPLET_ID] = "applet";
539+
ID_TO_TAGNAME[ELEMENT_AREA_ID] = "area";
540+
ID_TO_TAGNAME[ELEMENT_ARTICLE_ID] = "article";
541+
ID_TO_TAGNAME[ELEMENT_ASIDE_ID] = "aside";
542+
ID_TO_TAGNAME[ELEMENT_BASE_ID] = "base";
543+
ID_TO_TAGNAME[ELEMENT_BASEFONT_ID] = "basefont";
544+
ID_TO_TAGNAME[ELEMENT_BGSOUND_ID] = "bgsound";
545+
ID_TO_TAGNAME[ELEMENT_BLOCKQUOTE_ID] = "blockquote";
546+
ID_TO_TAGNAME[ELEMENT_BODY_ID] = "body";
547+
ID_TO_TAGNAME[ELEMENT_BR_ID] = "br";
548+
ID_TO_TAGNAME[ELEMENT_BUTTON_ID] = "button";
549+
ID_TO_TAGNAME[ELEMENT_CAPTION_ID] = "caption";
550+
ID_TO_TAGNAME[ELEMENT_CENTER_ID] = "center";
551+
ID_TO_TAGNAME[ELEMENT_COL_ID] = "col";
552+
ID_TO_TAGNAME[ELEMENT_COLGROUP_ID] = "colgroup";
553+
ID_TO_TAGNAME[ELEMENT_DD_ID] = "dd";
554+
ID_TO_TAGNAME[ELEMENT_DETAILS_ID] = "details";
555+
ID_TO_TAGNAME[ELEMENT_DIR_ID] = "dir";
556+
ID_TO_TAGNAME[ELEMENT_DIV_ID] = "div";
557+
ID_TO_TAGNAME[ELEMENT_DL_ID] = "dl";
558+
ID_TO_TAGNAME[ELEMENT_DT_ID] = "dt";
559+
ID_TO_TAGNAME[ELEMENT_EMBED_ID] = "embed";
560+
ID_TO_TAGNAME[ELEMENT_FIELDSET_ID] = "fieldset";
561+
ID_TO_TAGNAME[ELEMENT_FIGCAPTION_ID] = "figcaption";
562+
ID_TO_TAGNAME[ELEMENT_FIGURE_ID] = "figure";
563+
ID_TO_TAGNAME[ELEMENT_FOOTER_ID] = "footer";
564+
ID_TO_TAGNAME[ELEMENT_FORM_ID] = "form";
565+
ID_TO_TAGNAME[ELEMENT_FRAME_ID] = "frame";
566+
ID_TO_TAGNAME[ELEMENT_FRAMESET_ID] = "frameset";
567+
ID_TO_TAGNAME[ELEMENT_H1_ID] = "h1";
568+
ID_TO_TAGNAME[ELEMENT_H2_ID] = "h2";
569+
ID_TO_TAGNAME[ELEMENT_H3_ID] = "h3";
570+
ID_TO_TAGNAME[ELEMENT_H4_ID] = "h4";
571+
ID_TO_TAGNAME[ELEMENT_H5_ID] = "h5";
572+
ID_TO_TAGNAME[ELEMENT_H6_ID] = "h6";
573+
ID_TO_TAGNAME[ELEMENT_HEAD_ID] = "head";
574+
ID_TO_TAGNAME[ELEMENT_HEADER_ID] = "header";
575+
ID_TO_TAGNAME[ELEMENT_HGROUP_ID] = "hgroup";
576+
ID_TO_TAGNAME[ELEMENT_HR_ID] = "hr";
577+
ID_TO_TAGNAME[ELEMENT_HTML_ID] = "html";
578+
ID_TO_TAGNAME[ELEMENT_IFRAME_ID] = "iframe";
579+
ID_TO_TAGNAME[ELEMENT_IMG_ID] = "img";
580+
ID_TO_TAGNAME[ELEMENT_INPUT_ID] = "input";
581+
ID_TO_TAGNAME[ELEMENT_LI_ID] = "li";
582+
ID_TO_TAGNAME[ELEMENT_LINK_ID] = "link";
583+
ID_TO_TAGNAME[ELEMENT_LISTING_ID] = "listing";
584+
ID_TO_TAGNAME[ELEMENT_MAIN_ID] = "main";
585+
ID_TO_TAGNAME[ELEMENT_MARQUEE_ID] = "marquee";
586+
ID_TO_TAGNAME[ELEMENT_MENU_ID] = "menu";
587+
ID_TO_TAGNAME[ELEMENT_META_ID] = "meta";
588+
ID_TO_TAGNAME[ELEMENT_NAV_ID] = "nav";
589+
ID_TO_TAGNAME[ELEMENT_NOEMBED_ID] = "noembed";
590+
ID_TO_TAGNAME[ELEMENT_NOFRAMES_ID] = "noframes";
591+
ID_TO_TAGNAME[ELEMENT_NOSCRIPT_ID] = "noscript";
592+
ID_TO_TAGNAME[ELEMENT_OBJECT_ID] = "object";
593+
ID_TO_TAGNAME[ELEMENT_OL_ID] = "ol";
594+
ID_TO_TAGNAME[ELEMENT_P_ID] = "p";
595+
ID_TO_TAGNAME[ELEMENT_PARAM_ID] = "param";
596+
ID_TO_TAGNAME[ELEMENT_PLAINTEXT_ID] = "plaintext";
597+
ID_TO_TAGNAME[ELEMENT_PRE_ID] = "pre";
598+
ID_TO_TAGNAME[ELEMENT_SCRIPT_ID] = "script";
599+
ID_TO_TAGNAME[ELEMENT_SECTION_ID] = "section";
600+
ID_TO_TAGNAME[ELEMENT_SELECT_ID] = "select";
601+
ID_TO_TAGNAME[ELEMENT_SOURCE_ID] = "source";
602+
ID_TO_TAGNAME[ELEMENT_STYLE_ID] = "style";
603+
ID_TO_TAGNAME[ELEMENT_SUMMARY_ID] = "summary";
604+
ID_TO_TAGNAME[ELEMENT_TABLE_ID] = "table";
605+
ID_TO_TAGNAME[ELEMENT_TBODY_ID] = "tbody";
606+
ID_TO_TAGNAME[ELEMENT_TD_ID] = "td";
607+
ID_TO_TAGNAME[ELEMENT_TEMPLATE_ID] = "template";
608+
ID_TO_TAGNAME[ELEMENT_TEXTAREA_ID] = "textarea";
609+
ID_TO_TAGNAME[ELEMENT_TFOOT_ID] = "tfoot";
610+
ID_TO_TAGNAME[ELEMENT_TH_ID] = "th";
611+
ID_TO_TAGNAME[ELEMENT_THEAD_ID] = "thead";
612+
ID_TO_TAGNAME[ELEMENT_TITLE_ID] = "title";
613+
ID_TO_TAGNAME[ELEMENT_TR_ID] = "tr";
614+
ID_TO_TAGNAME[ELEMENT_TRACK_ID] = "track";
615+
ID_TO_TAGNAME[ELEMENT_UL_ID] = "ul";
616+
ID_TO_TAGNAME[ELEMENT_WBR_ID] = "wbr";
617+
ID_TO_TAGNAME[ELEMENT_XMP_ID] = "xmp";
618+
ID_TO_TAGNAME[ELEMENT_OPTGROUP_ID] = "optgroup";
619+
ID_TO_TAGNAME[ELEMENT_OPTION_ID] = "option";
620+
ID_TO_TAGNAME[ELEMENT_RB_ID] = "rb";
621+
ID_TO_TAGNAME[ELEMENT_RP_ID] = "rp";
622+
ID_TO_TAGNAME[ELEMENT_RT_ID] = "rt";
623+
ID_TO_TAGNAME[ELEMENT_RTC_ID] = "rtc";
624+
ID_TO_TAGNAME[ELEMENT_A_ID] = "a";
625+
ID_TO_TAGNAME[ELEMENT_DIALOG_ID] = "dialog";
626+
ID_TO_TAGNAME[ELEMENT_SEARCH_ID] = "search";
627+
ID_TO_TAGNAME[ELEMENT_B_ID] = "b";
628+
ID_TO_TAGNAME[ELEMENT_BIG_ID] = "big";
629+
ID_TO_TAGNAME[ELEMENT_CODE_ID] = "code";
630+
ID_TO_TAGNAME[ELEMENT_EM_ID] = "em";
631+
ID_TO_TAGNAME[ELEMENT_FONT_ID] = "font";
632+
ID_TO_TAGNAME[ELEMENT_I_ID] = "i";
633+
ID_TO_TAGNAME[ELEMENT_S_ID] = "s";
634+
ID_TO_TAGNAME[ELEMENT_SMALL_ID] = "small";
635+
ID_TO_TAGNAME[ELEMENT_STRIKE_ID] = "strike";
636+
ID_TO_TAGNAME[ELEMENT_STRONG_ID] = "strong";
637+
ID_TO_TAGNAME[ELEMENT_TT_ID] = "tt";
638+
ID_TO_TAGNAME[ELEMENT_U_ID] = "u";
639+
ID_TO_TAGNAME[ELEMENT_NO_BR_ID] = "nobr";
640+
ID_TO_TAGNAME[ELEMENT_KEYGEN_ID] = "keygen";
641+
ID_TO_TAGNAME[ELEMENT_IMAGE_ID] = "image";
642+
ID_TO_TAGNAME[ELEMENT_MATH_ID] = "math";
643+
ID_TO_TAGNAME[ELEMENT_SVG_ID] = "svg";
644+
ID_TO_TAGNAME[ELEMENT_RUBY_ID] = "ruby";
645+
ID_TO_TAGNAME[ELEMENT_SPAN_ID] = "span";
646+
ID_TO_TAGNAME[ELEMENT_SUB_ID] = "sub";
647+
ID_TO_TAGNAME[ELEMENT_SUP_ID] = "sup";
648+
ID_TO_TAGNAME[ELEMENT_VAR_ID] = "var";
649+
}
650+
651+
static String getTagNameFromID(int id) {
652+
if (id > 0 && id < ID_TO_TAGNAME.length) {
653+
return ID_TO_TAGNAME[id];
654+
}
655+
return null;
656+
}
657+
518658
static boolean isSpecialCategory(Element element) {
519659
String nodeName = element.nodeName;
520660
int nodeNameID = element.nodeNameID;

0 commit comments

Comments
 (0)