From df6e5050d799a76109729ff561446032c06561cc Mon Sep 17 00:00:00 2001 From: GeiserX <9169332+GeiserX@users.noreply.github.com> Date: Wed, 10 Jun 2026 21:23:19 +0200 Subject: [PATCH] refactor(backup): share service_action_type, document vocabulary divergence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #191 review: - Move service_action_type to message_utils.py beside extract_topic_id (both paths' shared home); drop the now-unused re import in telegram_backup.py. - Docstring notes the acronym snake-case edge (SetMessagesTTL -> set_messages_t_t_l) and that the backfill action_type vocabulary is intentionally distinct from the listener's curated event names — only the raw_data shape is shared, not the values. - Reword the _process_message comment to say structural parity (keys), not value parity. - Add direct unit tests for service_action_type (topic, multi-word, no-arg, acronym edge) and a non-topic MessageActionChatEditTitle / MessageActionPinMessage backfill test. --- src/message_utils.py | 26 ++++++++++++++++++ src/telegram_backup.py | 26 +++++++----------- tests/test_telegram_backup.py | 50 ++++++++++++++++++++++++++++++++++- 3 files changed, 84 insertions(+), 18 deletions(-) diff --git a/src/message_utils.py b/src/message_utils.py index c2e645a2..a2a0e3e7 100644 --- a/src/message_utils.py +++ b/src/message_utils.py @@ -5,6 +5,7 @@ import hashlib import logging import os +import re logger = logging.getLogger(__name__) @@ -287,3 +288,28 @@ def extract_topic_id(message: object) -> int | None: if topic_id is None: topic_id = getattr(message.reply_to, "reply_to_msg_id", None) return topic_id + + +def service_action_type(action: object) -> str: + """Normalize a Telethon ``MessageAction`` class name to a snake_case tag. + + Used by the backup backfill path to label service messages in + ``raw_data.action_type`` (e.g. forum topic creations/renames). + + Examples: ``MessageActionTopicCreate`` -> ``"topic_create"``, + ``MessageActionTopicEdit`` -> ``"topic_edit"``, + ``MessageActionChatEditTitle`` -> ``"chat_edit_title"``. + + Note: consecutive capitals (acronyms) are split letter-by-letter, e.g. + ``MessageActionSetMessagesTTL`` -> ``"set_messages_t_t_l"``. None of the + title-bearing actions we care about are affected; the tag is only a stable, + deterministic identifier and is not parsed back, so this is cosmetic. + + This vocabulary is intentionally distinct from the live listener's curated + event-derived set (``title_changed``, ``user_joined``, ...): the backfill + sees low-level ``MessageAction`` classes while the listener sees high-level + ``events.ChatAction`` flags. Only the ``raw_data`` *shape* is shared, not the + ``action_type`` *values*. + """ + name = type(action).__name__.removeprefix("MessageAction") + return re.sub(r"(? str: - """Normalize a Telethon MessageAction class name to snake_case. - - Examples: MessageActionTopicCreate -> "topic_create", - MessageActionTopicEdit -> "topic_edit", - MessageActionChatEditTitle -> "chat_edit_title". - """ - name = type(action).__name__.removeprefix("MessageAction") - return re.sub(r"(? dict: } # Preserve service-action metadata (e.g. forum topic creations and - # renames) so historical backfills keep parity with the listener's - # raw_data convention (service_type / action_type, since v6.0.0). - # Without this, service events are stored without their payload and - # the information is irrecoverable once the history is archived. + # renames) so historical backfills carry the same raw_data *shape* as + # the live listener (service_type / action_type / new_title). The + # action_type *vocabulary* differs by design: the backfill derives it + # from low-level MessageAction class names (chat_edit_title, ...) while + # the listener uses curated event names (title_changed, ...) — only the + # keys are shared, not the values. Without this, service events are + # stored without their payload and are irrecoverable once archived. action = getattr(message, "action", None) if action is not None: message_data["raw_data"]["service_type"] = "service" - message_data["raw_data"]["action_type"] = _service_action_type(action) + message_data["raw_data"]["action_type"] = service_action_type(action) action_title = getattr(action, "title", None) if action_title is not None: message_data["raw_data"]["new_title"] = self._text_with_entities_to_string(action_title) diff --git a/tests/test_telegram_backup.py b/tests/test_telegram_backup.py index 4d6986e8..81a79366 100644 --- a/tests/test_telegram_backup.py +++ b/tests/test_telegram_backup.py @@ -12,6 +12,8 @@ from telethon.tl.types import ( Channel, Chat, + MessageActionChatEditTitle, + MessageActionPinMessage, MessageActionTopicCreate, MessageActionTopicEdit, MessageMediaContact, @@ -23,7 +25,7 @@ User, ) -from src.message_utils import extract_topic_id +from src.message_utils import extract_topic_id, service_action_type from src.telegram_backup import TelegramBackup @@ -744,6 +746,34 @@ def test_returns_none_when_both_ids_none(self): self.assertIsNone(extract_topic_id(msg)) +class TestServiceActionType(unittest.TestCase): + """Test the shared service_action_type class-name normalizer.""" + + def test_topic_create(self): + self.assertEqual(service_action_type(MessageActionTopicCreate(title="x", icon_color=0)), "topic_create") + + def test_topic_edit(self): + self.assertEqual(service_action_type(MessageActionTopicEdit(title="x")), "topic_edit") + + def test_multi_word_chat_edit_title(self): + self.assertEqual(service_action_type(MessageActionChatEditTitle(title="x")), "chat_edit_title") + + def test_no_argument_action(self): + self.assertEqual(service_action_type(MessageActionPinMessage()), "pin_message") + + def test_acronym_run_splits_letter_by_letter(self): + """Documents the known cosmetic edge: consecutive capitals split. + + No title-bearing action we consume hits this; the tag is a stable, + unparsed identifier, so the behavior is intentional and pinned here. + """ + + class MessageActionSetMessagesTTL: + pass + + self.assertEqual(service_action_type(MessageActionSetMessagesTTL()), "set_messages_t_t_l") + + class TestExtractForwardFromId(unittest.TestCase): """Test _extract_forward_from_id for different Peer types.""" @@ -1266,6 +1296,24 @@ def test_regular_message_has_no_service_metadata(self): self.assertNotIn("action_type", result["raw_data"]) self.assertNotIn("new_title", result["raw_data"]) + def test_chat_edit_title_action_stored_in_raw_data(self): + """Non-topic action: a group rename stores a multi-word action_type.""" + msg = self._make_message(12, text=None) + msg.action = MessageActionChatEditTitle(title="New Group Name") + result = self._run(self.backup._process_message(msg, 100)) + self.assertEqual(result["raw_data"]["service_type"], "service") + self.assertEqual(result["raw_data"]["action_type"], "chat_edit_title") + self.assertEqual(result["raw_data"]["new_title"], "New Group Name") + + def test_pin_message_action_has_no_new_title(self): + """An action without a title stores action_type but no new_title.""" + msg = self._make_message(13, text=None) + msg.action = MessageActionPinMessage() + result = self._run(self.backup._process_message(msg, 100)) + self.assertEqual(result["raw_data"]["service_type"], "service") + self.assertEqual(result["raw_data"]["action_type"], "pin_message") + self.assertNotIn("new_title", result["raw_data"]) + def test_none_text_becomes_empty_string(self): """Message with None text stores empty string.""" msg = self._make_message(7, text=None)