Skip to content

Commit ab8d1ad

Browse files
committed
Added 4th alternative. Manually coded )
1 parent cfa288a commit ab8d1ad

1 file changed

Lines changed: 123 additions & 0 deletions

File tree

telebot/formatting.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# "deepseek" - deepseek version
1212
# "gemini" - gemini version
1313
# "chatgpt" - chatgpt version
14+
# "coder" - @coder2020official version
1415
# other values - original version
1516
ENTITY_PARSER_MODE = None
1617

@@ -411,6 +412,8 @@ def apply_html_entities(text: str, entities: Optional[List], custom_subs: Option
411412
return apply_html_entities_gm(text, entities, custom_subs)
412413
elif ENTITY_PARSER_MODE == "chatgpt":
413414
return apply_html_entities_cg(text, entities, custom_subs)
415+
elif ENTITY_PARSER_MODE == "coder":
416+
return apply_html_entities_coder(text, entities, custom_subs)
414417

415418
if not entities:
416419
return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
@@ -1035,3 +1038,123 @@ def apply_html_entities_cg(
10351038
tree = build_tree(entities, mapping)
10361039
return render(text, tree, custom_subs)
10371040
#endregion
1041+
1042+
def apply_html_entities_coder(text: str, entities=None, custom_subs=None) -> str:
1043+
"""
1044+
Apply HTML formatting to text based on provided entities.
1045+
Handles nested and overlapping entities correctly.
1046+
"""
1047+
if not entities:
1048+
return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
1049+
1050+
_subs = {
1051+
"bold": "<b>{text}</b>",
1052+
"italic": "<i>{text}</i>",
1053+
"pre": "<pre>{text}</pre>",
1054+
"code": "<code>{text}</code>",
1055+
"text_link": "<a href=\"{url}\">{text}</a>",
1056+
"strikethrough": "<s>{text}</s>",
1057+
"underline": "<u>{text}</u>",
1058+
"spoiler": "<span class=\"tg-spoiler\">{text}</span>",
1059+
"custom_emoji": "<tg-emoji emoji-id=\"{custom_emoji_id}\">{text}</tg-emoji>",
1060+
"blockquote": "<blockquote>{text}</blockquote>",
1061+
"expandable_blockquote": "<blockquote expandable>{text}</blockquote>",
1062+
}
1063+
1064+
if custom_subs:
1065+
for key, value in custom_subs.items():
1066+
_subs[key] = value
1067+
1068+
# Sort entities by offset (starting position), with longer entities first for equal offsets
1069+
sorted_entities = sorted(entities, key=lambda e: (e.offset, -e.length))
1070+
1071+
# Convert text to utf-16 encoding for proper handling
1072+
utf16_text = text.encode("utf-16-le")
1073+
1074+
def escape_html(text_part):
1075+
"""Escape HTML special characters in a text part"""
1076+
if isinstance(text_part, bytes):
1077+
text_part = text_part.decode("utf-16-le")
1078+
return text_part.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
1079+
1080+
def format_entity(entity, content):
1081+
"""Apply entity formatting to the content"""
1082+
entity_type = entity.type
1083+
1084+
# Handle different entity types
1085+
if entity_type == "text_mention" and hasattr(entity, 'user'):
1086+
return f"<a href=\"tg://user?id={entity.user.id}\">{content}</a>"
1087+
# elif entity_type == "mention": # No need to do this, @username works fine
1088+
# username = content[1:] # Remove @ symbol
1089+
# return f"<a href=\"https://t.me/{username}\">{content}</a>"
1090+
elif entity_type == "text_link" and hasattr(entity, 'url'):
1091+
return f"<a href=\"{entity.url}\">{content}</a>"
1092+
elif entity_type == "custom_emoji" and hasattr(entity, 'custom_emoji_id'):
1093+
return f"<tg-emoji emoji-id=\"{entity.custom_emoji_id}\">{content}</tg-emoji>"
1094+
elif (entity_type == "pre" and hasattr(entity, 'language') and entity.language):
1095+
return f"<pre><code class=\"language-{entity.language}\">{content}</code></pre>"
1096+
elif entity_type in _subs:
1097+
template = _subs[entity_type]
1098+
return template.format(text=content)
1099+
1100+
# If no matching entity type, return text as is
1101+
return content
1102+
1103+
def process_entities(byte_text, entity_list, start_pos=0, end_pos=None):
1104+
if end_pos is None:
1105+
end_pos = len(byte_text)
1106+
1107+
if not entity_list or start_pos >= end_pos:
1108+
return escape_html(byte_text[start_pos:end_pos])
1109+
1110+
current_entity = entity_list[0]
1111+
current_start = current_entity.offset * 2
1112+
current_end = current_start + current_entity.length * 2
1113+
1114+
if current_end <= start_pos or current_start >= end_pos:
1115+
return escape_html(byte_text[start_pos:end_pos])
1116+
1117+
result = []
1118+
1119+
if current_start > start_pos:
1120+
result.append(escape_html(byte_text[start_pos:current_start]))
1121+
1122+
nested_entities = []
1123+
remaining_entities = []
1124+
1125+
for entity in entity_list[1:]:
1126+
entity_start = entity.offset * 2
1127+
# entity_end = entity_start + entity.length * 2
1128+
1129+
if entity_start >= current_start and entity_start < current_end:
1130+
nested_entities.append(entity)
1131+
else:
1132+
remaining_entities.append(entity)
1133+
1134+
if nested_entities:
1135+
inner_content = process_entities(
1136+
byte_text,
1137+
nested_entities,
1138+
current_start,
1139+
current_end
1140+
)
1141+
else:
1142+
inner_content = escape_html(byte_text[current_start:current_end])
1143+
1144+
result.append(format_entity(current_entity, inner_content))
1145+
1146+
if current_end < end_pos and remaining_entities:
1147+
result.append(process_entities(
1148+
byte_text,
1149+
remaining_entities,
1150+
current_end,
1151+
end_pos
1152+
))
1153+
elif current_end < end_pos:
1154+
result.append(escape_html(byte_text[current_end:end_pos]))
1155+
1156+
return "".join(result)
1157+
1158+
html_result = process_entities(utf16_text, sorted_entities)
1159+
1160+
return html_result

0 commit comments

Comments
 (0)