Skip to content
Draft
9 changes: 6 additions & 3 deletions src/chat_sdk/adapters/discord/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

from chat_sdk.adapters.discord.cards import (
card_to_discord_payload,
card_to_fallback_text,
)
from chat_sdk.adapters.discord.format_converter import DiscordFormatConverter
from chat_sdk.adapters.discord.types import (
Expand Down Expand Up @@ -747,7 +746,8 @@ async def post_message(
card_payload = card_to_discord_payload(card)
embeds.extend(card_payload["embeds"])
components.extend(card_payload["components"])
payload["content"] = self._truncate_content(card_to_fallback_text(card))
# Don't include text — Discord renders both `content` and the card
# embed if `content` is set, so cards would post duplicate text.
else:
payload["content"] = self._truncate_content(
convert_emoji_placeholders(
Expand Down Expand Up @@ -848,7 +848,10 @@ async def edit_message(
card_payload = card_to_discord_payload(card)
embeds.extend(card_payload["embeds"])
components.extend(card_payload["components"])
payload["content"] = self._truncate_content(card_to_fallback_text(card))
# Clear content explicitly so leftover text from a previous edit
# doesn't render alongside the card. Discord PATCH preserves
# omitted fields, so we must send "" rather than skip the key.
payload["content"] = ""
else:
payload["content"] = self._truncate_content(
convert_emoji_placeholders(
Expand Down
214 changes: 208 additions & 6 deletions src/chat_sdk/adapters/slack/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,11 @@ def _pin_task(task: asyncio.Task[Any]) -> None:
_CHANNEL_CACHE_TTL_MS = 8 * 24 * 60 * 60 * 1000
_REVERSE_INDEX_TTL_MS = 8 * 24 * 60 * 60 * 1000

# Ignored message subtypes (system/meta events)
# Ignored message subtypes (system/meta events).
# `message_changed` is NOT in this set — it is routed to
# `_handle_message_changed` so we can capture link unfurl metadata.
_IGNORED_SUBTYPES = frozenset(
{
"message_changed",
"message_deleted",
"message_replied",
"channel_join",
Expand All @@ -147,6 +148,15 @@ def _pin_task(task: asyncio.Task[Any]) -> None:
}
)

# Link-unfurl wait window: Slack delivers unfurled attachments via a
# separate `message_changed` event ~100-2000ms after the original. We
# poll briefly so the message handler sees enriched links instead of
# bare URLs.
_TRAILING_SLASH_PATTERN = re.compile(r"/$")
_UNFURL_WAIT_MS = 2000
_UNFURL_POLL_MS = 150
_UNFURL_CACHE_TTL_MS = 60 * 60 * 1000 # 1 hour


# ---------------------------------------------------------------------------
# Helpers
Expand Down Expand Up @@ -1223,6 +1233,9 @@ def _handle_message_event(self, event: dict[str, Any], options: WebhookOptions |
return

subtype = event.get("subtype")
if subtype == "message_changed":
self._handle_message_changed(event, options)
return
if subtype and subtype in _IGNORED_SUBTYPES:
self._logger.debug("Ignoring message subtype", {"subtype": subtype})
return
Expand Down Expand Up @@ -1676,7 +1689,13 @@ async def _resolve_message_mentions(
# ==================================================================

def _extract_links(self, event: dict[str, Any]) -> list[LinkPreview]:
"""Extract link URLs from a Slack event."""
"""Extract link URLs from a Slack event.

Also merges any inline unfurl metadata that Slack already attached to
this same event (legacy ``attachments`` array). Cross-event unfurl
metadata (delivered later via ``message_changed``) is merged
asynchronously via :meth:`_enrich_links`.
"""
urls: set[str] = set()

for block in event.get("blocks", []):
Expand All @@ -1692,7 +1711,176 @@ def _extract_links(self, event: dict[str, Any]) -> list[LinkPreview]:
pipe_idx = raw.find("|")
urls.add(raw[:pipe_idx] if pipe_idx >= 0 else raw)

return [self._create_link_preview(url) for url in urls]
# Build unfurl metadata index from inline (same-event) attachments.
unfurls: dict[str, dict[str, str | None]] = {}
for att in event.get("attachments") or []:
if not isinstance(att, dict):
continue
att_url = att.get("from_url") or att.get("original_url")
if att_url and (att.get("title") or att.get("text")):
unfurls[att_url] = {
"title": att.get("title"),
"description": att.get("text"),
"image_url": att.get("image_url") or att.get("thumb_url"),
"site_name": att.get("service_name"),
}
urls.add(att_url)

previews: list[LinkPreview] = []
for url in urls:
preview = self._create_link_preview(url)
# TS uses ``url.replace(TRAILING_SLASH_PATTERN, "")`` (no ``g``
# flag) which strips a single trailing ``/``. Python's
# ``re.sub`` defaults to replacing all occurrences, so we
# pin ``count=1`` for parity. (Practically the regex anchors
# at end-of-string so only one match exists, but locking
# this in prevents drift if the pattern ever loosens.)
unfurl = (
unfurls.get(url) or unfurls.get(_TRAILING_SLASH_PATTERN.sub("", url, count=1)) or unfurls.get(f"{url}/")
)
if unfurl:
preview = self._merge_unfurl_into_preview(preview, unfurl)
previews.append(preview)
return previews

@staticmethod
def _merge_unfurl_into_preview(preview: LinkPreview, unfurl: dict[str, str | None]) -> LinkPreview:
"""Return a new LinkPreview with unfurl metadata merged in.

Mirrors the TS spread ``{ ...preview, ...unfurl }``: the unfurl
values OVERRIDE the preview's ``description`` / ``image_url`` /
``site_name`` (the unfurled attachment is the authoritative
source). ``title`` is short-circuited by callers (``_enrich_links``
skips merging when the preview already has a title), but for the
same-event ``_extract_links`` path the unfurl's title also wins
when present. ``fetch_message`` is never present on the unfurl
and is preserved from the preview.
"""
return LinkPreview(
url=preview.url,
title=unfurl.get("title") if unfurl.get("title") is not None else preview.title,
description=unfurl.get("description") if unfurl.get("description") is not None else preview.description,
image_url=unfurl.get("image_url") if unfurl.get("image_url") is not None else preview.image_url,
site_name=unfurl.get("site_name") if unfurl.get("site_name") is not None else preview.site_name,
fetch_message=preview.fetch_message,
)

def _handle_message_changed(self, event: dict[str, Any], _options: WebhookOptions | None = None) -> None:
"""Cache unfurl metadata from ``message_changed`` events.

Slack delivers link unfurls asynchronously by editing the original
message and dispatching ``message_changed``. We extract any unfurl
attachments and store them keyed by the inner message ``ts`` so
:meth:`_enrich_links` can pick them up for the original event.
"""
inner = event.get("message")
channel = event.get("channel")
if not (inner and channel and isinstance(inner, dict)):
return

attachments = inner.get("attachments") or []
has_unfurls = any(
isinstance(att, dict) and (att.get("from_url") or att.get("original_url")) for att in attachments
)
if not has_unfurls:
self._logger.debug("Ignoring message_changed without unfurl data")
return

ts = inner.get("ts")
if not (self._chat and ts):
return

self._logger.debug(
"Processing message_changed for link unfurls",
{"channel": channel, "ts": ts, "attachmentCount": len(attachments)},
)

unfurls: dict[str, dict[str, str | None]] = {}
for att in attachments:
if not isinstance(att, dict):
continue
att_url = att.get("from_url") or att.get("original_url")
if att_url and (att.get("title") or att.get("text")):
unfurls[att_url] = {
"title": att.get("title"),
"description": att.get("text"),
"image_url": att.get("image_url") or att.get("thumb_url"),
"site_name": att.get("service_name"),
}

if not unfurls:
return

async def _store() -> None:
try:
await self._chat.get_state().set( # type: ignore[union-attr]
f"slack:unfurls:{ts}",
unfurls,
_UNFURL_CACHE_TTL_MS,
)
except Exception as exc:
self._logger.error("Failed to cache unfurl metadata", {"error": exc})

try:
task = asyncio.get_running_loop().create_task(_store())
task.add_done_callback(
lambda t: (
self._logger.error("Unfurl cache task failed", {"error": t.exception()}) if t.exception() else None
)
)
except RuntimeError:
# No running loop (sync test context) — skip silently.
self._logger.debug("No running loop; skipping unfurl cache write")

async def _enrich_links(self, links: list[LinkPreview], message_ts: str | None) -> list[LinkPreview]:
"""Enrich ``links`` with unfurl metadata from a ``message_changed`` cache.

Polls the state cache for up to ``_UNFURL_WAIT_MS`` to give Slack
time to deliver the cross-event ``message_changed`` payload.
Returns the original list (untouched) when there is nothing to wait
for.
"""
if not (self._chat and message_ts) or not links:
return links

all_have_metadata = all((link.title is not None) or (link.fetch_message is not None) for link in links)
if all_have_metadata:
return links

deadline = time.monotonic() + (_UNFURL_WAIT_MS / 1000.0)
state = self._chat.get_state()
stored: dict[str, dict[str, str | None]] | None = None
while True:
try:
stored = await state.get(f"slack:unfurls:{message_ts}")
except Exception as exc:
self._logger.warn(
"Failed to read unfurl data from state",
{"error": str(exc), "message_ts": message_ts},
)
return links
if stored or time.monotonic() >= deadline:
break
await asyncio.sleep(_UNFURL_POLL_MS / 1000.0)

if not stored:
return links

out: list[LinkPreview] = []
for link in links:
if link.title is not None:
out.append(link)
continue
unfurl = (
stored.get(link.url)
or stored.get(_TRAILING_SLASH_PATTERN.sub("", link.url, count=1))
or stored.get(f"{link.url}/")
)
if unfurl:
out.append(self._merge_unfurl_into_preview(link, unfurl))
else:
out.append(link)
return out

def _create_link_preview(self, url: str) -> LinkPreview:
"""Create a LinkPreview for a URL.
Expand Down Expand Up @@ -1798,7 +1986,13 @@ async def _parse_slack_message(
self._create_attachment(f, team_id=event.get("team") or event.get("team_id"))
for f in event.get("files", [])
],
links=self._extract_links(event),
# ``_enrich_links`` polls the unfurl cache for up to
# ``_UNFURL_WAIT_MS`` (2000 ms) before giving up, so every
# message containing a not-yet-unfurled link adds up to
# ~2s of latency to message handling worst-case (it returns
# immediately when the cache is already populated or when
# there are no links to enrich).
links=await self._enrich_links(self._extract_links(event), event.get("ts")),
)

def _parse_slack_message_sync(self, event: dict[str, Any], thread_id: str) -> Message:
Expand Down Expand Up @@ -2302,7 +2496,15 @@ async def stream(

decoded = self.decode_thread_id(thread_id)
channel = decoded.channel
thread_ts = decoded.thread_ts
# Normalize empty thread_ts to None to avoid Slack API "invalid_thread_ts" errors.
# Stream requires a real thread context — bail out when missing.
thread_ts = decoded.thread_ts or None
if not thread_ts:
self._logger.debug("Slack: stream skipped - no thread context")
raise ValidationError(
"slack",
"Slack streaming requires a valid thread context (non-empty thread_ts)",
)
self._logger.debug("Slack: starting stream", {"channel": channel, "threadTs": thread_ts})

token = self._get_token()
Expand Down
10 changes: 8 additions & 2 deletions src/chat_sdk/adapters/slack/format_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
table_to_ascii,
)

# Match bare @mentions (e.g. "@george") to rewrite as Slack's `<@george>`.
# The lookbehind excludes `<` (already-formatted mentions like `<@U123>`) and
# any word character, so email addresses like `user@example.com` and
# `<mailto:foo@bar.com>` links pass through unchanged.
_BARE_MENTION_REGEX = re.compile(r"(?<![<\w])@(\w+)")


class SlackFormatConverter(BaseFormatConverter):
"""Convert between Slack mrkdwn and standard markdown / plain text."""
Expand Down Expand Up @@ -200,7 +206,7 @@ def flush_text() -> None:

def _convert_mentions_to_slack(self, text: str) -> str:
"""Convert @mentions to Slack format: @name -> <@name>."""
return re.sub(r"(?<!<)@(\w+)", r"<@\1>", text)
return _BARE_MENTION_REGEX.sub(r"<@\1>", text)

def _node_to_mrkdwn(self, node: Content) -> str:
"""Convert a single AST node to Slack mrkdwn."""
Expand All @@ -215,7 +221,7 @@ def _node_to_mrkdwn(self, node: Content) -> str:

if node_type == "text":
value = node.get("value", "")
return re.sub(r"(?<!<)@(\w+)", r"<@\1>", value)
return _BARE_MENTION_REGEX.sub(r"<@\1>", value)

if node_type == "strong":
content = "".join(self._node_to_mrkdwn(c) for c in children)
Expand Down
Loading
Loading