@@ -492,115 +492,3 @@ def process_entities(byte_text, entity_list, start_pos=0, end_pos=None):
492492 html_result = process_entities (utf16_text , sorted_entities )
493493
494494 return html_result
495-
496-
497- #region DeepSeek vibecoding here
498- class EntityProcessor :
499- """
500- Handles parsing of text with message entities to HTML.
501- """
502-
503- # Entity type to HTML template mapping
504- ENTITY_TEMPLATES = {
505- "bold" : "<b>{text}</b>" ,
506- "italic" : "<i>{text}</i>" ,
507- "pre" : "<pre>{text}</pre>" ,
508- "code" : "<code>{text}</code>" ,
509- "text_link" : "<a href=\" {url}\" >{text}</a>" ,
510- "strikethrough" : "<s>{text}</s>" ,
511- "underline" : "<u>{text}</u>" ,
512- "spoiler" : "<span class=\" tg-spoiler\" >{text}</span>" ,
513- "custom_emoji" : "<tg-emoji emoji-id=\" {custom_emoji_id}\" >{text}</tg-emoji>" ,
514- "blockquote" : "<blockquote>{text}</blockquote>" ,
515- "expandable_blockquote" : "<blockquote expandable>{text}</blockquote>" ,
516- }
517-
518- def __init__ (self , text : str , custom_subs : Optional [Dict [str , str ]] = None ):
519- self .text = text
520- self .utf16_mapping = self .utf16_code_units_to_indices (text )
521- self .total_utf16_units = len (self .utf16_mapping )
522- self .custom_subs = custom_subs
523-
524- def check_entity_exists (self , entity_type : str ) -> bool :
525- """
526- Check if an entity type has a defined HTML template, considering custom substitutions.
527- """
528- return (entity_type in self .ENTITY_TEMPLATES ) or (self .custom_subs and (entity_type in self .custom_subs ))
529-
530- def get_entity_template (self , entity_type : str , default : Optional [str ] = None ) -> Optional [str ]:
531- """
532- Get the HTML template for a given entity type, considering custom substitutions.
533- """
534- if entity_type in self .ENTITY_TEMPLATES :
535- return self .ENTITY_TEMPLATES [entity_type ]
536- elif self .custom_subs and (entity_type in self .custom_subs ):
537- return self .custom_subs [entity_type ]
538- else :
539- return default
540-
541- @staticmethod
542- def utf16_code_units_to_indices (text : str ) -> List [int ]:
543- """
544- Convert UTF-16 code unit positions to Python string indices.
545-
546- Returns:
547- code_unit_to_char_idx: Mapping from UTF-16 code unit position to character index
548- """
549- code_unit_to_char_idx = []
550-
551- code_unit_pos = 0
552- for char_idx , char in enumerate (text ):
553- code_point = ord (char )
554- # Characters outside BMP (U+10000 to U+10FFFF) use 2 UTF-16 code units
555- if code_point >= 0x10000 :
556- code_units = 2
557- else :
558- code_units = 1
559-
560- # Map this code unit position to character index
561- for _ in range (code_units ):
562- code_unit_to_char_idx .append (char_idx )
563-
564- code_unit_pos += code_units
565-
566- return code_unit_to_char_idx
567-
568- def utf16_to_char_index (self , utf16_pos : int ) -> int :
569- """
570- Convert UTF-16 code unit position to character index.
571- """
572- if utf16_pos >= len (self .utf16_mapping ):
573- return len (self .text )
574- return self .utf16_mapping [utf16_pos ]
575-
576- def get_entity_text (self , entity ) -> str : # entity: MessageEntity
577- """
578- Extract the text for an entity using UTF-16 code unit offsets.
579- """
580- start_char = self .utf16_to_char_index (entity .offset )
581- end_char = self .utf16_to_char_index (entity .offset + entity .length )
582- return self .text [start_char :end_char ]
583-
584- def create_html_tag (self , entity , content : str ) -> str : # entity: MessageEntity
585- """
586- Create HTML tag for an entity with the given content.
587- """
588- entity_type = entity .type
589-
590- template = self .get_entity_template (entity_type )
591- if not template :
592- return content
593-
594- # Prepare format arguments
595- format_args = {"text" : content }
596- if entity_type == "text_mention" :
597- template = self .get_entity_template ("text_link" )
598- format_args ["url" ] = "tg://user?id={0}" .format (entity .user .id )
599- elif entity_type == "text_link" :
600- format_args ["url" ] = escape_html (entity .url or "" )
601- elif entity_type == "custom_emoji" :
602- format_args ["custom_emoji_id" ] = entity .custom_emoji_id or ""
603- elif entity_type == "pre" and entity .language :
604- format_args ["text" ] = '<code class="language-{}">{}</code></pre>' .format (entity .language , format_args ["text" ])
605-
606- return template .format (** format_args )
0 commit comments