Skip to content

Commit 12730a5

Browse files
feat(telegram): inbound rich-message parsing (chat@4.31 4662309 — TG3/4)
Port the 4.31 inbound parse path for Bot API 10.1 rich messages. parse_telegram_message: - Render an inbound `rich_message` to markdown (rich_message_to_markdown) and plain text (rich_message_to_text) via the TG2 renderer. - Faithful ?/??/|| precedence: - rich_markdown: presence (truthy) check on raw.rich_message - plain_text: `?? raw.text ?? raw.caption ?? (rich ? richMessageToText : "")` -- nullish chain ported as `is not None` (empty string short-circuits) - text: `content?.text ? content.text : applyTelegramEntities(...)` -- TRUTHY `?`, empty content.text falls through to entities - formatted: `content?.formatted ?? toAst(richMarkdown || text)` -- nullish for the supplied AST, TRUTHY-OR for richMarkdown || text - New optional `content` arg (TelegramParsedContent) wires the outbound pre-rendered AST/text reuse; inbound callers pass None. extract_attachments: - Append media extracted from rich_message via rich_message_media, mapping each RichMedia onto Attachment (mimeType -> mime_type boundary), appended after the top-level photo/video/audio/voice/document/video_note. Tests (tests/test_telegram_webhook.py): inbound rich text+AST, caption precedence, rich-markdown-driven formatted, nested collage media (photo largest size, video name/mime), animation image-vs-video by mime, voice_note -> audio, list-block recursion, append ordering.
1 parent 01e5e6e commit 12730a5

2 files changed

Lines changed: 390 additions & 4 deletions

File tree

src/chat_sdk/adapters/telegram/adapter.py

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@
2929
empty_telegram_inline_keyboard,
3030
)
3131
from chat_sdk.adapters.telegram.format_converter import TelegramFormatConverter
32+
from chat_sdk.adapters.telegram.rich import (
33+
rich_message_media,
34+
rich_message_to_markdown,
35+
rich_message_to_text,
36+
)
3237
from chat_sdk.adapters.telegram.types import (
3338
TelegramAdapterConfig,
3439
TelegramApiResponse,
@@ -156,6 +161,20 @@ class ResolvedTelegramLongPollingConfig:
156161
timeout: int
157162

158163

164+
@dataclass
165+
class TelegramParsedContent:
166+
"""Pre-resolved formatted/plain text for a parsed Telegram message.
167+
168+
Supplied by the outbound post/edit/stream paths when the SDK already
169+
rendered a rich message locally, so :meth:`parse_telegram_message`
170+
reuses that AST/text instead of re-deriving it from the raw payload.
171+
Port of upstream's ``content?`` argument to ``parseTelegramMessage``.
172+
"""
173+
174+
formatted: FormattedContent
175+
text: str
176+
177+
159178
TelegramRuntimeMode = str # "webhook" | "polling"
160179

161180
_T = TypeVar("_T")
@@ -1975,11 +1994,37 @@ def parse_telegram_message(
19751994
self,
19761995
raw: TelegramMessage,
19771996
thread_id: str,
1997+
content: TelegramParsedContent | None = None,
19781998
) -> Message:
19791999
"""Parse a Telegram message into a normalised :class:`Message`."""
1980-
plain_text = raw.get("text") or raw.get("caption") or ""
1981-
entities = raw.get("entities") or raw.get("caption_entities") or []
1982-
text = apply_telegram_entities(plain_text, entities)
2000+
rich_message = raw.get("rich_message")
2001+
# `raw.rich_message ? ... : ""` -- presence (truthy) check on the field.
2002+
rich_markdown = rich_message_to_markdown(rich_message) if rich_message else ""
2003+
# Upstream chains `??` here (nullish): an empty-string text/caption is a
2004+
# real value and short-circuits the chain. Port each `??` as `is not None`.
2005+
content_text = content.text if content is not None else None
2006+
raw_text = raw.get("text")
2007+
raw_caption = raw.get("caption")
2008+
if content_text is not None:
2009+
plain_text = content_text
2010+
elif raw_text is not None:
2011+
plain_text = raw_text
2012+
elif raw_caption is not None:
2013+
plain_text = raw_caption
2014+
elif rich_message:
2015+
plain_text = rich_message_to_text(rich_message)
2016+
else:
2017+
plain_text = ""
2018+
# `raw.entities ?? raw.caption_entities ?? []` -- nullish: present-but-empty
2019+
# entity lists are honoured rather than falling through to caption_entities.
2020+
entities = raw.get("entities")
2021+
if entities is None:
2022+
entities = raw.get("caption_entities")
2023+
if entities is None:
2024+
entities = []
2025+
# `content?.text ? content.text : applyTelegramEntities(...)` -- TRUTHY `?`:
2026+
# a present-but-empty `content.text` falls through to the entity-applied text.
2027+
text = content.text if content is not None and content.text else apply_telegram_entities(plain_text, entities)
19832028

19842029
# Determine author -- Telegram uses 'from' key which is a reserved word
19852030
from_user = cast(
@@ -2004,11 +2049,19 @@ def parse_telegram_message(
20042049

20052050
edit_date = raw.get("edit_date")
20062051

2052+
# `content?.formatted ?? this.formatConverter.toAst(richMarkdown || text)`:
2053+
# `??` (nullish) for the supplied AST, then `richMarkdown || text` is a
2054+
# TRUTHY-OR -- a non-empty rendered rich markdown wins, else fall to text.
2055+
if content is not None and content.formatted is not None:
2056+
formatted = content.formatted
2057+
else:
2058+
formatted = self._format_converter.to_ast(rich_markdown or text)
2059+
20072060
return Message(
20082061
id=self.encode_message_id(str(raw["chat"]["id"]), raw["message_id"]),
20092062
thread_id=thread_id,
20102063
text=text,
2011-
formatted=self._format_converter.to_ast(text),
2064+
formatted=formatted,
20122065
raw=raw,
20132066
author=author,
20142067
metadata=MessageMetadata(
@@ -2104,6 +2157,26 @@ def extract_attachments(self, raw: TelegramMessage) -> list[Attachment]:
21042157
)
21052158
)
21062159

2160+
# Bot API 10.1 rich messages carry their media inline in the block tree
2161+
# (port of chat@4.31 4662309). `rich_message_media` walks the nested
2162+
# blocks (lists, blockquotes, collages, slideshows, details) and yields
2163+
# a flat `RichMedia` list; map each onto our `Attachment` shape. The
2164+
# `mimeType` -> `mime_type` rename is the only field-name boundary.
2165+
rich_message = raw.get("rich_message")
2166+
if rich_message:
2167+
for media in rich_message_media(rich_message):
2168+
attachments.append(
2169+
self.create_attachment(
2170+
media.type,
2171+
media.file["file_id"],
2172+
size=media.file.get("file_size"),
2173+
width=media.width,
2174+
height=media.height,
2175+
name=media.name,
2176+
mime_type=media.mime_type,
2177+
)
2178+
)
2179+
21072180
return attachments
21082181

21092182
def create_attachment(

0 commit comments

Comments
 (0)