Skip to content

Commit 5765b3d

Browse files
feat(telegram): route bot commands to slash-command handlers (chat@4.31 9c936f8) (#154)
* feat(telegram): route bot commands to slash-command handlers (chat@4.31 9c936f8) Port upstream Telegram slash-command routing. process_update now gates a new handle_slash_command_update before handle_incoming_message_update, but only for update.message (not edited messages or channel posts), so a leading /command routes to the core process_slash_command surface instead of process_message. parse_slash_command requires a bot_command entity at offset 0 (an offset-7 command stays a regular message), matches @bot targeting case-insensitively against user_name (/ping@otherbot is ignored), reads the caption/caption_entities path when text is absent (has_text = text is not None, so empty-string text still takes the text branch), and splits the command from its trailing text on UTF-16-LE code-unit offsets via a new _slice_utf16 helper rather than naive str slicing. The event mirrors the Discord adapter: SlashCommandEvent(channel=None) with the resolved thread id attached as channel_id for Chat to re-wrap. Adds 7 regression tests (routes command, routes caption, ignores otherbot, only-leading-entity, empty-string-text branch, UTF-16 split, naive-slice divergence guard). * test(telegram): de-phantom UTF-16 split test, add slash-routing coverage Make test_trailing_text_split_uses_utf16_offsets genuinely exercise the UTF-16 code-unit split. The prior input ("/p😀g hello") converged: a separating space meant naive code-point slicing and UTF-16-LE offset slicing both yielded "hello" after lstrip, so the test passed against a naive text[entity_length:] implementation. The new input ("/p😀ghello") abuts the trailing text with no separator, so the naive slice over- advances and drops the leading "h" ("ello"), while the UTF-16-aware split keeps it ("hello") — verified to FAIL against a reverted naive slice. Add targeted regression coverage, each pinned to a distinct mutation: - @bot targeting is case-insensitive (/ping@MyBot routes when user_name is "mybot"); fails under a case-sensitive comparison. - slash gating is scoped to update.message only: edited_message and channel_post carrying a leading bot_command entity route to process_message, never the slash handler; fails if the gate widens to message_update. - empty-command guard: /@Mybot and a bare / yield no slash command and route to process_message (matches upstream's if (!commandName)); fails if the guard is removed.
1 parent f801985 commit 5765b3d

2 files changed

Lines changed: 426 additions & 1 deletion

File tree

src/chat_sdk/adapters/telegram/adapter.py

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
PostableMarkdown,
8484
RawMessage,
8585
ReactionEvent,
86+
SlashCommandEvent,
8687
StreamChunk,
8788
StreamOptions,
8889
ThreadInfo,
@@ -1049,7 +1050,15 @@ def process_update(
10491050
or update.get("edited_channel_post")
10501051
)
10511052

1052-
if message_update:
1053+
# Slash commands are gated to fresh ``message`` updates only — edited
1054+
# messages and channel posts never route to the slash-command
1055+
# handlers. ``handle_slash_command_update`` returns ``True`` when it
1056+
# consumed the update, in which case the regular message path is
1057+
# skipped (mirrors upstream ``messageUpdate && !handledSlashCommand``).
1058+
message = update.get("message")
1059+
handled_slash_command = message is not None and self.handle_slash_command_update(message, options)
1060+
1061+
if message_update and not handled_slash_command:
10531062
self.handle_incoming_message_update(message_update, options)
10541063

10551064
if update.get("callback_query"):
@@ -1079,6 +1088,103 @@ def handle_incoming_message_update(
10791088

10801089
self._chat.process_message(self, thread_id, parsed_message, options)
10811090

1091+
def handle_slash_command_update(
1092+
self,
1093+
telegram_message: TelegramMessage,
1094+
options: WebhookOptions | None = None,
1095+
) -> bool:
1096+
"""Route a leading ``/command`` message to the slash-command handlers.
1097+
1098+
Returns ``True`` when the update was consumed as a slash command (so
1099+
:meth:`process_update` skips the regular message path), and ``False``
1100+
otherwise. Like the Discord adapter, the event is built with
1101+
``channel=None`` and the resolved thread ID is attached as
1102+
``channel_id`` — ``Chat`` re-wraps it into a real ``Channel`` before
1103+
invoking handlers.
1104+
"""
1105+
if not self._chat:
1106+
return False
1107+
1108+
slash_command = self.parse_slash_command(telegram_message)
1109+
if not slash_command:
1110+
return False
1111+
1112+
thread_id = self.encode_thread_id(
1113+
TelegramThreadId(
1114+
chat_id=str(telegram_message["chat"]["id"]),
1115+
message_thread_id=telegram_message.get("message_thread_id"),
1116+
)
1117+
)
1118+
1119+
parsed_message = self.parse_telegram_message(telegram_message, thread_id)
1120+
self.cache_message(parsed_message)
1121+
1122+
event = SlashCommandEvent(
1123+
adapter=self,
1124+
channel=None, # pyrefly: ignore[bad-argument-type] # filled in by Chat
1125+
user=parsed_message.author,
1126+
command=slash_command["command"],
1127+
text=slash_command["text"],
1128+
raw=telegram_message,
1129+
)
1130+
event.channel_id = thread_id # type: ignore[attr-defined]
1131+
self._chat.process_slash_command(event, options)
1132+
1133+
return True
1134+
1135+
def parse_slash_command(
1136+
self,
1137+
telegram_message: TelegramMessage,
1138+
) -> dict[str, str] | None:
1139+
"""Extract a leading ``/command`` (and trailing text) from a message.
1140+
1141+
Returns ``None`` unless the message carries a ``bot_command`` entity
1142+
at offset 0 (a command at any other offset routes to
1143+
``process_message``). ``@bot`` targeting is matched
1144+
case-insensitively against :attr:`user_name`; a command addressed to
1145+
another bot (``/ping@otherbot``) is ignored. Both the text/caption
1146+
selection (``has_text = text is not None``, so an empty-string
1147+
``text`` still takes the text branch) and the command/trailing-text
1148+
split use UTF-16-LE offsets, matching Telegram's entity indexing.
1149+
"""
1150+
has_text = telegram_message.get("text") is not None
1151+
text = telegram_message.get("text") if has_text else telegram_message.get("caption")
1152+
entities = (
1153+
(telegram_message.get("entities") or []) if has_text else (telegram_message.get("caption_entities") or [])
1154+
)
1155+
1156+
if not text:
1157+
return None
1158+
1159+
command_entity = next(
1160+
(e for e in entities if e.get("type") == "bot_command" and e.get("offset", 0) == 0),
1161+
None,
1162+
)
1163+
1164+
if not command_entity:
1165+
return None
1166+
1167+
raw_command = self.entity_text(text, command_entity)
1168+
if not raw_command.startswith("/"):
1169+
return None
1170+
1171+
command_without_slash = raw_command[1:]
1172+
at_index = command_without_slash.find("@")
1173+
command_name = command_without_slash if at_index == -1 else command_without_slash[:at_index]
1174+
target_bot = None if at_index == -1 else command_without_slash[at_index + 1 :]
1175+
1176+
if not command_name:
1177+
return None
1178+
1179+
if target_bot and target_bot.lower() != self._user_name.lower():
1180+
return None
1181+
1182+
offset = command_entity.get("offset", 0)
1183+
length = command_entity.get("length", 0)
1184+
trailing = self._slice_utf16(text, offset + length).lstrip()
1185+
1186+
return {"command": f"/{command_name}", "text": trailing}
1187+
10821188
def handle_callback_query(
10831189
self,
10841190
callback_query: TelegramCallbackQuery,
@@ -2493,6 +2599,18 @@ def entity_text(self, text: str, entity: TelegramMessageEntity) -> str:
24932599
utf16 = text.encode("utf-16-le")
24942600
return utf16[offset * 2 : (offset + length) * 2].decode("utf-16-le")
24952601

2602+
@staticmethod
2603+
def _slice_utf16(text: str, offset: int) -> str:
2604+
"""Return the substring of ``text`` from a UTF-16 code-unit ``offset``.
2605+
2606+
Telegram entity offsets count UTF-16 code units (matching JavaScript
2607+
``String.prototype.slice``), so an astral-plane code point (e.g. an
2608+
emoji) advances the offset by two. Naive Python ``str`` slicing counts
2609+
code points and would mis-split such text — this encodes to UTF-16-LE
2610+
and slices by byte, mirroring :meth:`entity_text`.
2611+
"""
2612+
return text.encode("utf-16-le")[offset * 2 :].decode("utf-16-le")
2613+
24962614
@staticmethod
24972615
def escape_regex(input_str: str) -> str:
24982616
"""Escape regex special characters."""

0 commit comments

Comments
 (0)