|
11 | 11 | # "deepseek" - deepseek version |
12 | 12 | # "gemini" - gemini version |
13 | 13 | # "chatgpt" - chatgpt version |
| 14 | +# "coder" - @coder2020official version |
14 | 15 | # other values - original version |
15 | 16 | ENTITY_PARSER_MODE = None |
16 | 17 |
|
@@ -411,6 +412,8 @@ def apply_html_entities(text: str, entities: Optional[List], custom_subs: Option |
411 | 412 | return apply_html_entities_gm(text, entities, custom_subs) |
412 | 413 | elif ENTITY_PARSER_MODE == "chatgpt": |
413 | 414 | return apply_html_entities_cg(text, entities, custom_subs) |
| 415 | + elif ENTITY_PARSER_MODE == "coder": |
| 416 | + return apply_html_entities_coder(text, entities, custom_subs) |
414 | 417 |
|
415 | 418 | if not entities: |
416 | 419 | return text.replace("&", "&").replace("<", "<").replace(">", ">") |
@@ -1035,3 +1038,123 @@ def apply_html_entities_cg( |
1035 | 1038 | tree = build_tree(entities, mapping) |
1036 | 1039 | return render(text, tree, custom_subs) |
1037 | 1040 | #endregion |
| 1041 | + |
| 1042 | +def apply_html_entities_coder(text: str, entities=None, custom_subs=None) -> str: |
| 1043 | + """ |
| 1044 | + Apply HTML formatting to text based on provided entities. |
| 1045 | + Handles nested and overlapping entities correctly. |
| 1046 | + """ |
| 1047 | + if not entities: |
| 1048 | + return text.replace("&", "&").replace("<", "<").replace(">", ">") |
| 1049 | + |
| 1050 | + _subs = { |
| 1051 | + "bold": "<b>{text}</b>", |
| 1052 | + "italic": "<i>{text}</i>", |
| 1053 | + "pre": "<pre>{text}</pre>", |
| 1054 | + "code": "<code>{text}</code>", |
| 1055 | + "text_link": "<a href=\"{url}\">{text}</a>", |
| 1056 | + "strikethrough": "<s>{text}</s>", |
| 1057 | + "underline": "<u>{text}</u>", |
| 1058 | + "spoiler": "<span class=\"tg-spoiler\">{text}</span>", |
| 1059 | + "custom_emoji": "<tg-emoji emoji-id=\"{custom_emoji_id}\">{text}</tg-emoji>", |
| 1060 | + "blockquote": "<blockquote>{text}</blockquote>", |
| 1061 | + "expandable_blockquote": "<blockquote expandable>{text}</blockquote>", |
| 1062 | + } |
| 1063 | + |
| 1064 | + if custom_subs: |
| 1065 | + for key, value in custom_subs.items(): |
| 1066 | + _subs[key] = value |
| 1067 | + |
| 1068 | + # Sort entities by offset (starting position), with longer entities first for equal offsets |
| 1069 | + sorted_entities = sorted(entities, key=lambda e: (e.offset, -e.length)) |
| 1070 | + |
| 1071 | + # Convert text to utf-16 encoding for proper handling |
| 1072 | + utf16_text = text.encode("utf-16-le") |
| 1073 | + |
| 1074 | + def escape_html(text_part): |
| 1075 | + """Escape HTML special characters in a text part""" |
| 1076 | + if isinstance(text_part, bytes): |
| 1077 | + text_part = text_part.decode("utf-16-le") |
| 1078 | + return text_part.replace("&", "&").replace("<", "<").replace(">", ">") |
| 1079 | + |
| 1080 | + def format_entity(entity, content): |
| 1081 | + """Apply entity formatting to the content""" |
| 1082 | + entity_type = entity.type |
| 1083 | + |
| 1084 | + # Handle different entity types |
| 1085 | + if entity_type == "text_mention" and hasattr(entity, 'user'): |
| 1086 | + return f"<a href=\"tg://user?id={entity.user.id}\">{content}</a>" |
| 1087 | + # elif entity_type == "mention": # No need to do this, @username works fine |
| 1088 | + # username = content[1:] # Remove @ symbol |
| 1089 | + # return f"<a href=\"https://t.me/{username}\">{content}</a>" |
| 1090 | + elif entity_type == "text_link" and hasattr(entity, 'url'): |
| 1091 | + return f"<a href=\"{entity.url}\">{content}</a>" |
| 1092 | + elif entity_type == "custom_emoji" and hasattr(entity, 'custom_emoji_id'): |
| 1093 | + return f"<tg-emoji emoji-id=\"{entity.custom_emoji_id}\">{content}</tg-emoji>" |
| 1094 | + elif (entity_type == "pre" and hasattr(entity, 'language') and entity.language): |
| 1095 | + return f"<pre><code class=\"language-{entity.language}\">{content}</code></pre>" |
| 1096 | + elif entity_type in _subs: |
| 1097 | + template = _subs[entity_type] |
| 1098 | + return template.format(text=content) |
| 1099 | + |
| 1100 | + # If no matching entity type, return text as is |
| 1101 | + return content |
| 1102 | + |
| 1103 | + def process_entities(byte_text, entity_list, start_pos=0, end_pos=None): |
| 1104 | + if end_pos is None: |
| 1105 | + end_pos = len(byte_text) |
| 1106 | + |
| 1107 | + if not entity_list or start_pos >= end_pos: |
| 1108 | + return escape_html(byte_text[start_pos:end_pos]) |
| 1109 | + |
| 1110 | + current_entity = entity_list[0] |
| 1111 | + current_start = current_entity.offset * 2 |
| 1112 | + current_end = current_start + current_entity.length * 2 |
| 1113 | + |
| 1114 | + if current_end <= start_pos or current_start >= end_pos: |
| 1115 | + return escape_html(byte_text[start_pos:end_pos]) |
| 1116 | + |
| 1117 | + result = [] |
| 1118 | + |
| 1119 | + if current_start > start_pos: |
| 1120 | + result.append(escape_html(byte_text[start_pos:current_start])) |
| 1121 | + |
| 1122 | + nested_entities = [] |
| 1123 | + remaining_entities = [] |
| 1124 | + |
| 1125 | + for entity in entity_list[1:]: |
| 1126 | + entity_start = entity.offset * 2 |
| 1127 | + # entity_end = entity_start + entity.length * 2 |
| 1128 | + |
| 1129 | + if entity_start >= current_start and entity_start < current_end: |
| 1130 | + nested_entities.append(entity) |
| 1131 | + else: |
| 1132 | + remaining_entities.append(entity) |
| 1133 | + |
| 1134 | + if nested_entities: |
| 1135 | + inner_content = process_entities( |
| 1136 | + byte_text, |
| 1137 | + nested_entities, |
| 1138 | + current_start, |
| 1139 | + current_end |
| 1140 | + ) |
| 1141 | + else: |
| 1142 | + inner_content = escape_html(byte_text[current_start:current_end]) |
| 1143 | + |
| 1144 | + result.append(format_entity(current_entity, inner_content)) |
| 1145 | + |
| 1146 | + if current_end < end_pos and remaining_entities: |
| 1147 | + result.append(process_entities( |
| 1148 | + byte_text, |
| 1149 | + remaining_entities, |
| 1150 | + current_end, |
| 1151 | + end_pos |
| 1152 | + )) |
| 1153 | + elif current_end < end_pos: |
| 1154 | + result.append(escape_html(byte_text[current_end:end_pos])) |
| 1155 | + |
| 1156 | + return "".join(result) |
| 1157 | + |
| 1158 | + html_result = process_entities(utf16_text, sorted_entities) |
| 1159 | + |
| 1160 | + return html_result |
0 commit comments