Skip to content

Commit e35a26b

Browse files
committed
fix(platform):address-dedup-review-feedback-for-qqofficial
1 parent 80f8f88 commit e35a26b

File tree

3 files changed

+113
-88
lines changed

3 files changed

+113
-88
lines changed

astrbot/core/event_bus.py

Lines changed: 6 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@
1111
"""
1212

1313
import asyncio
14-
import hashlib
1514
from asyncio import Queue
1615

1716
from astrbot.core import logger
1817
from astrbot.core.astrbot_config_mgr import AstrBotConfigManager
19-
from astrbot.core.message.utils import build_component_dedup_signature
18+
from astrbot.core.message.utils import (
19+
build_event_content_dedup_key,
20+
build_event_message_id_dedup_key,
21+
)
2022
from astrbot.core.pipeline.scheduler import PipelineScheduler
2123
from astrbot.core.utils.number_utils import safe_positive_float
2224
from astrbot.core.utils.ttl_registry import TTLKeyRegistry
@@ -31,53 +33,9 @@ class EventDeduplicator:
3133
and message ID, with configurable TTL window.
3234
"""
3335

34-
_MAX_RAW_TEXT_FINGERPRINT_LEN = 256
35-
3636
def __init__(self, ttl_seconds: float = 0.5) -> None:
3737
self._registry = TTLKeyRegistry(ttl_seconds)
3838

39-
def _build_attachment_signature(self, event: AstrMessageEvent) -> str:
40-
"""Build attachment signature for deduplication."""
41-
return build_component_dedup_signature(event.get_messages())
42-
43-
def _build_content_key(self, event: AstrMessageEvent) -> str:
44-
"""Build content-based deduplication key."""
45-
msg_text = (event.get_message_str() or "").strip()
46-
if len(msg_text) <= self._MAX_RAW_TEXT_FINGERPRINT_LEN:
47-
msg_sig = msg_text
48-
else:
49-
msg_hash = hashlib.sha1(msg_text.encode("utf-8")).hexdigest()[:16]
50-
msg_sig = f"h:{len(msg_text)}:{msg_hash}"
51-
52-
attach_sig = self._build_attachment_signature(event)
53-
return "|".join([
54-
"content",
55-
event.get_platform_id() or "",
56-
event.unified_msg_origin or "",
57-
event.get_sender_id() or "",
58-
msg_sig,
59-
attach_sig,
60-
])
61-
62-
def _build_message_id_key(self, event: AstrMessageEvent) -> str | None:
63-
"""Build message ID-based deduplication key.
64-
65-
Falls back to message_obj.id if message_id is not available.
66-
"""
67-
# Try message_id first
68-
message_id = str(getattr(event.message_obj, "message_id", "") or "")
69-
# Fallback to id if message_id is not available
70-
if not message_id:
71-
message_id = str(getattr(event.message_obj, "id", "") or "")
72-
if not message_id:
73-
return None
74-
return "|".join([
75-
"message_id",
76-
event.get_platform_id() or "",
77-
event.unified_msg_origin or "",
78-
message_id,
79-
])
80-
8139
def is_duplicate(self, event: AstrMessageEvent) -> bool:
8240
"""Check if the event is a duplicate.
8341
@@ -89,7 +47,7 @@ def is_duplicate(self, event: AstrMessageEvent) -> bool:
8947
return False
9048

9149
# Short-circuit: check message_id first (cheap) before computing full content key (expensive)
92-
message_id_key = self._build_message_id_key(event)
50+
message_id_key = build_event_message_id_dedup_key(event)
9351
if message_id_key is not None:
9452
if self._registry.contains(message_id_key):
9553
logger.debug(
@@ -102,7 +60,7 @@ def is_duplicate(self, event: AstrMessageEvent) -> bool:
10260
self._registry.add(message_id_key)
10361

10462
# Only compute full content key if we get past message_id check
105-
content_key = self._build_content_key(event)
63+
content_key = build_event_content_dedup_key(event)
10664
if self._registry.contains(content_key):
10765
logger.debug(
10866
"Skip duplicate event in event_bus (by content): umo=%s, sender=%s",

astrbot/core/message/utils.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11
"""Message utilities for deduplication and component handling."""
22

33
import hashlib
4-
from typing import Iterable
4+
from collections.abc import Iterable
5+
from typing import TYPE_CHECKING
56

67
from astrbot.core.message.components import BaseMessageComponent, File, Image
78

9+
if TYPE_CHECKING:
10+
from astrbot.core.platform import AstrMessageEvent
11+
12+
13+
_MAX_RAW_TEXT_FINGERPRINT_LEN = 256
14+
815

916
def build_component_dedup_signature(
1017
components: Iterable[BaseMessageComponent],
@@ -40,3 +47,56 @@ def build_component_dedup_signature(
4047

4148
payload = "|".join(parts)
4249
return hashlib.sha1(payload.encode("utf-8")).hexdigest()[:16]
50+
51+
52+
def build_sender_content_dedup_key(content: str, sender_id: str) -> str | None:
53+
"""Build a sender+content hash key for short-window deduplication."""
54+
if not (content and sender_id):
55+
return None
56+
content_hash = hashlib.sha1(content.encode("utf-8")).hexdigest()[:16]
57+
return f"{sender_id}:{content_hash}"
58+
59+
60+
def build_event_content_dedup_key(event: "AstrMessageEvent") -> str:
61+
"""Build a content fingerprint key for EventBus deduplication."""
62+
msg_text = str(event.get_message_str() or "").strip()
63+
if len(msg_text) <= _MAX_RAW_TEXT_FINGERPRINT_LEN:
64+
msg_sig = msg_text
65+
else:
66+
msg_hash = hashlib.sha1(msg_text.encode("utf-8")).hexdigest()[:16]
67+
msg_sig = f"h:{len(msg_text)}:{msg_hash}"
68+
69+
attach_sig = build_component_dedup_signature(event.get_messages())
70+
platform_id = str(event.get_platform_id() or "")
71+
unified_msg_origin = str(event.unified_msg_origin or "")
72+
sender_id = str(event.get_sender_id() or "")
73+
return "|".join(
74+
[
75+
"content",
76+
platform_id,
77+
unified_msg_origin,
78+
sender_id,
79+
msg_sig,
80+
attach_sig,
81+
]
82+
)
83+
84+
85+
def build_event_message_id_dedup_key(event: "AstrMessageEvent") -> str | None:
86+
"""Build a message_id fingerprint key for EventBus deduplication."""
87+
message_id = str(getattr(event.message_obj, "message_id", "") or "")
88+
if not message_id:
89+
message_id = str(getattr(event.message_obj, "id", "") or "")
90+
if not message_id:
91+
return None
92+
93+
platform_id = str(event.get_platform_id() or "")
94+
unified_msg_origin = str(event.unified_msg_origin or "")
95+
return "|".join(
96+
[
97+
"message_id",
98+
platform_id,
99+
unified_msg_origin,
100+
message_id,
101+
]
102+
)

astrbot/core/platform/sources/qqofficial/qqofficial_platform_adapter.py

Lines changed: 46 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

33
import asyncio
4-
import hashlib
54
import logging
65
import os
76
import random
@@ -23,6 +22,7 @@
2322
PlatformMetadata,
2423
)
2524
from astrbot.core.message.components import BaseMessageComponent
25+
from astrbot.core.message.utils import build_sender_content_dedup_key
2626
from astrbot.core.platform.astr_message_event import MessageSesion
2727
from astrbot.core.utils.number_utils import safe_positive_float
2828
from astrbot.core.utils.ttl_registry import TTLKeyRegistry
@@ -55,13 +55,22 @@ def _extract_sender_id(message) -> str:
5555
Returns:
5656
The sender ID as a string, or empty string if not found.
5757
"""
58-
if hasattr(message, "author") and hasattr(message.author, "user_openid"):
59-
return str(message.author.user_openid)
60-
if hasattr(message, "author") and hasattr(message.author, "member_openid"):
61-
return str(message.author.member_openid)
62-
if hasattr(message, "author") and hasattr(message.author, "id"):
63-
return str(message.author.id)
64-
return ""
58+
author = getattr(message, "author", None)
59+
if not author:
60+
return ""
61+
62+
sender_id = (
63+
getattr(author, "user_openid", None)
64+
or getattr(author, "member_openid", None)
65+
or getattr(author, "id", None)
66+
)
67+
if sender_id is None:
68+
return ""
69+
70+
sender_id_str = str(sender_id).strip()
71+
if not sender_id_str:
72+
return ""
73+
return sender_id_str
6574

6675

6776
class MessageDeduplicator:
@@ -82,10 +91,7 @@ def __init__(
8291
self._lock = asyncio.Lock()
8392

8493
def _build_content_key(self, content: str, sender_id: str) -> str | None:
85-
if not (content and sender_id):
86-
return None
87-
content_hash = hashlib.sha1(content.encode("utf-8")).hexdigest()[:16]
88-
return f"{sender_id}:{content_hash}"
94+
return build_sender_content_dedup_key(content, sender_id)
8995

9096
async def is_duplicate(
9197
self,
@@ -94,24 +100,35 @@ async def is_duplicate(
94100
sender_id: str = "",
95101
) -> bool:
96102
async with self._lock:
97-
# Bypass deduplication if TTL is 0 (disabled)
98-
if self._message_ids.ttl_seconds == 0:
103+
id_dedup_enabled = self._message_ids.ttl_seconds > 0 and bool(message_id)
104+
content_dedup_enabled = self._content_keys.ttl_seconds > 0
105+
106+
if not id_dedup_enabled and not content_dedup_enabled:
99107
return False
100108

101109
# 1) ID-based dedup
102-
if self._message_ids.contains(message_id):
103-
logger.debug(
104-
"[QQOfficial] Duplicate message detected (by ID): %s...",
105-
message_id[:50],
106-
)
107-
return True
110+
if id_dedup_enabled:
111+
if self._message_ids.contains(message_id):
112+
logger.debug(
113+
"[QQOfficial] Duplicate message detected (by ID): %s...",
114+
message_id[:50],
115+
)
116+
return True
108117

109-
self._message_ids.add(message_id)
118+
self._message_ids.add(message_id)
110119

111120
# 2) Content-based dedup
121+
if not content_dedup_enabled:
122+
logger.debug(
123+
"[QQOfficial] New message registered: %s...", message_id[:50]
124+
)
125+
return False
126+
112127
content_key = self._build_content_key(content, sender_id)
113128
if content_key is None:
114-
logger.debug("[QQOfficial] New message registered: %s...", message_id[:50])
129+
logger.debug(
130+
"[QQOfficial] New message registered: %s...", message_id[:50]
131+
)
115132
return False
116133

117134
if self._content_keys.contains(content_key):
@@ -120,7 +137,8 @@ async def is_duplicate(
120137
content_key,
121138
)
122139
# Preserve existing behavior: do not keep message_id on content duplicates
123-
self._message_ids.discard(message_id)
140+
if id_dedup_enabled:
141+
self._message_ids.discard(message_id)
124142
return True
125143

126144
self._content_keys.add(content_key)
@@ -132,21 +150,9 @@ class botClient(Client):
132150
def set_platform(self, platform: QQOfficialPlatformAdapter) -> None:
133151
self.platform = platform
134152

135-
def _get_sender_id(self, message) -> str:
136-
"""Extract sender ID from different message types.
137-
138-
Delegates to the centralized _extract_sender_id function to avoid
139-
precedence drift.
140-
"""
141-
return _extract_sender_id(message)
142-
143-
def _extract_dedup_key(self, message) -> tuple[str, str]:
144-
sender_id = self._get_sender_id(message)
145-
content = getattr(message, "content", "") or ""
146-
return sender_id, content
147-
148153
async def _should_drop_message(self, message) -> bool:
149-
sender_id, content = self._extract_dedup_key(message)
154+
sender_id = _extract_sender_id(message)
155+
content = getattr(message, "content", "") or ""
150156
return await self.platform._is_duplicate_message(message.id, content, sender_id)
151157

152158
# 收到群消息
@@ -596,11 +602,12 @@ def _parse_from_qqofficial(
596602
message,
597603
botpy.message.C2CMessage,
598604
):
605+
sender_user_id = _extract_sender_id(message)
599606
if isinstance(message, botpy.message.GroupMessage):
600-
abm.sender = MessageMember(message.author.member_openid, "")
607+
abm.sender = MessageMember(sender_user_id, "")
601608
abm.group_id = message.group_openid
602609
else:
603-
abm.sender = MessageMember(message.author.user_openid, "")
610+
abm.sender = MessageMember(sender_user_id, "")
604611
# Parse face messages to readable text
605612
abm.message_str = QQOfficialPlatformAdapter._parse_face_message(
606613
message.content.strip()

0 commit comments

Comments
 (0)