Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/message_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import hashlib
import logging
import os
import re

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -287,3 +288,28 @@ def extract_topic_id(message: object) -> int | None:
if topic_id is None:
topic_id = getattr(message.reply_to, "reply_to_msg_id", None)
return topic_id


def service_action_type(action: object) -> str:
"""Normalize a Telethon ``MessageAction`` class name to a snake_case tag.

Used by the backup backfill path to label service messages in
``raw_data.action_type`` (e.g. forum topic creations/renames).

Examples: ``MessageActionTopicCreate`` -> ``"topic_create"``,
``MessageActionTopicEdit`` -> ``"topic_edit"``,
``MessageActionChatEditTitle`` -> ``"chat_edit_title"``.

Note: consecutive capitals (acronyms) are split letter-by-letter, e.g.
``MessageActionSetMessagesTTL`` -> ``"set_messages_t_t_l"``. None of the
title-bearing actions we care about are affected; the tag is only a stable,
deterministic identifier and is not parsed back, so this is cosmetic.

This vocabulary is intentionally distinct from the live listener's curated
event-derived set (``title_changed``, ``user_joined``, ...): the backfill
sees low-level ``MessageAction`` classes while the listener sees high-level
``events.ChatAction`` flags. Only the ``raw_data`` *shape* is shared, not the
``action_type`` *values*.
"""
name = type(action).__name__.removeprefix("MessageAction")
return re.sub(r"(?<!^)(?=[A-Z])", "_", name).lower()
26 changes: 9 additions & 17 deletions src/telegram_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import logging
import os
import random
import re
from datetime import UTC, datetime

from telethon import TelegramClient
Expand Down Expand Up @@ -44,6 +43,7 @@
finalize_atomic_download,
resolve_shared_file_path,
sanitize_media_filename,
service_action_type,
)
from .parallel_download import (
ParallelDownloader,
Expand Down Expand Up @@ -290,17 +290,6 @@ async def iter_messages_with_flood_retry(client, entity, *, min_id=0, **kwargs):
await asyncio.sleep(sleep_duration)


def _service_action_type(action: object) -> str:
"""Normalize a Telethon MessageAction class name to snake_case.

Examples: MessageActionTopicCreate -> "topic_create",
MessageActionTopicEdit -> "topic_edit",
MessageActionChatEditTitle -> "chat_edit_title".
"""
name = type(action).__name__.removeprefix("MessageAction")
return re.sub(r"(?<!^)(?=[A-Z])", "_", name).lower()


class TelegramBackup:
"""Main class for managing Telegram backups."""

Expand Down Expand Up @@ -1425,14 +1414,17 @@ async def _process_message(self, message: Message, chat_id: int) -> dict:
}

# Preserve service-action metadata (e.g. forum topic creations and
# renames) so historical backfills keep parity with the listener's
# raw_data convention (service_type / action_type, since v6.0.0).
# Without this, service events are stored without their payload and
# the information is irrecoverable once the history is archived.
# renames) so historical backfills carry the same raw_data *shape* as
# the live listener (service_type / action_type / new_title). The
# action_type *vocabulary* differs by design: the backfill derives it
# from low-level MessageAction class names (chat_edit_title, ...) while
# the listener uses curated event names (title_changed, ...) — only the
# keys are shared, not the values. Without this, service events are
# stored without their payload and are irrecoverable once archived.
action = getattr(message, "action", None)
if action is not None:
message_data["raw_data"]["service_type"] = "service"
message_data["raw_data"]["action_type"] = _service_action_type(action)
message_data["raw_data"]["action_type"] = service_action_type(action)
action_title = getattr(action, "title", None)
if action_title is not None:
message_data["raw_data"]["new_title"] = self._text_with_entities_to_string(action_title)
Expand Down
50 changes: 49 additions & 1 deletion tests/test_telegram_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from telethon.tl.types import (
Channel,
Chat,
MessageActionChatEditTitle,
MessageActionPinMessage,
MessageActionTopicCreate,
MessageActionTopicEdit,
MessageMediaContact,
Expand All @@ -23,7 +25,7 @@
User,
)

from src.message_utils import extract_topic_id
from src.message_utils import extract_topic_id, service_action_type
from src.telegram_backup import TelegramBackup


Expand Down Expand Up @@ -744,6 +746,34 @@ def test_returns_none_when_both_ids_none(self):
self.assertIsNone(extract_topic_id(msg))


class TestServiceActionType(unittest.TestCase):
"""Test the shared service_action_type class-name normalizer."""

def test_topic_create(self):
self.assertEqual(service_action_type(MessageActionTopicCreate(title="x", icon_color=0)), "topic_create")

def test_topic_edit(self):
self.assertEqual(service_action_type(MessageActionTopicEdit(title="x")), "topic_edit")

def test_multi_word_chat_edit_title(self):
self.assertEqual(service_action_type(MessageActionChatEditTitle(title="x")), "chat_edit_title")

def test_no_argument_action(self):
self.assertEqual(service_action_type(MessageActionPinMessage()), "pin_message")

def test_acronym_run_splits_letter_by_letter(self):
"""Documents the known cosmetic edge: consecutive capitals split.

No title-bearing action we consume hits this; the tag is a stable,
unparsed identifier, so the behavior is intentional and pinned here.
"""

class MessageActionSetMessagesTTL:
pass

self.assertEqual(service_action_type(MessageActionSetMessagesTTL()), "set_messages_t_t_l")


class TestExtractForwardFromId(unittest.TestCase):
"""Test _extract_forward_from_id for different Peer types."""

Expand Down Expand Up @@ -1266,6 +1296,24 @@ def test_regular_message_has_no_service_metadata(self):
self.assertNotIn("action_type", result["raw_data"])
self.assertNotIn("new_title", result["raw_data"])

def test_chat_edit_title_action_stored_in_raw_data(self):
"""Non-topic action: a group rename stores a multi-word action_type."""
msg = self._make_message(12, text=None)
msg.action = MessageActionChatEditTitle(title="New Group Name")
result = self._run(self.backup._process_message(msg, 100))
self.assertEqual(result["raw_data"]["service_type"], "service")
self.assertEqual(result["raw_data"]["action_type"], "chat_edit_title")
self.assertEqual(result["raw_data"]["new_title"], "New Group Name")

def test_pin_message_action_has_no_new_title(self):
"""An action without a title stores action_type but no new_title."""
msg = self._make_message(13, text=None)
msg.action = MessageActionPinMessage()
result = self._run(self.backup._process_message(msg, 100))
self.assertEqual(result["raw_data"]["service_type"], "service")
self.assertEqual(result["raw_data"]["action_type"], "pin_message")
self.assertNotIn("new_title", result["raw_data"])

def test_none_text_becomes_empty_string(self):
"""Message with None text stores empty string."""
msg = self._make_message(7, text=None)
Expand Down
Loading