@@ -122,10 +122,11 @@ def _pin_task(task: asyncio.Task[Any]) -> None:
122122_CHANNEL_CACHE_TTL_MS = 8 * 24 * 60 * 60 * 1000
123123_REVERSE_INDEX_TTL_MS = 8 * 24 * 60 * 60 * 1000
124124
125- # Ignored message subtypes (system/meta events)
125+ # Ignored message subtypes (system/meta events).
126+ # `message_changed` is NOT in this set — it is routed to
127+ # `_handle_message_changed` so we can capture link unfurl metadata.
126128_IGNORED_SUBTYPES = frozenset (
127129 {
128- "message_changed" ,
129130 "message_deleted" ,
130131 "message_replied" ,
131132 "channel_join" ,
@@ -147,6 +148,15 @@ def _pin_task(task: asyncio.Task[Any]) -> None:
147148 }
148149)
149150
151+ # Link-unfurl wait window: Slack delivers unfurled attachments via a
152+ # separate `message_changed` event ~100-2000ms after the original. We
153+ # poll briefly so the message handler sees enriched links instead of
154+ # bare URLs.
155+ _TRAILING_SLASH_PATTERN = re .compile (r"/$" )
156+ _UNFURL_WAIT_MS = 2000
157+ _UNFURL_POLL_MS = 150
158+ _UNFURL_CACHE_TTL_MS = 60 * 60 * 1000 # 1 hour
159+
150160
151161# ---------------------------------------------------------------------------
152162# Helpers
@@ -1223,6 +1233,9 @@ def _handle_message_event(self, event: dict[str, Any], options: WebhookOptions |
12231233 return
12241234
12251235 subtype = event .get ("subtype" )
1236+ if subtype == "message_changed" :
1237+ self ._handle_message_changed (event , options )
1238+ return
12261239 if subtype and subtype in _IGNORED_SUBTYPES :
12271240 self ._logger .debug ("Ignoring message subtype" , {"subtype" : subtype })
12281241 return
@@ -1676,7 +1689,13 @@ async def _resolve_message_mentions(
16761689 # ==================================================================
16771690
16781691 def _extract_links (self , event : dict [str , Any ]) -> list [LinkPreview ]:
1679- """Extract link URLs from a Slack event."""
1692+ """Extract link URLs from a Slack event.
1693+
1694+ Also merges any inline unfurl metadata that Slack already attached to
1695+ this same event (legacy ``attachments`` array). Cross-event unfurl
1696+ metadata (delivered later via ``message_changed``) is merged
1697+ asynchronously via :meth:`_enrich_links`.
1698+ """
16801699 urls : set [str ] = set ()
16811700
16821701 for block in event .get ("blocks" , []):
@@ -1692,7 +1711,159 @@ def _extract_links(self, event: dict[str, Any]) -> list[LinkPreview]:
16921711 pipe_idx = raw .find ("|" )
16931712 urls .add (raw [:pipe_idx ] if pipe_idx >= 0 else raw )
16941713
1695- return [self ._create_link_preview (url ) for url in urls ]
1714+ # Build unfurl metadata index from inline (same-event) attachments.
1715+ unfurls : dict [str , dict [str , str | None ]] = {}
1716+ for att in event .get ("attachments" ) or []:
1717+ if not isinstance (att , dict ):
1718+ continue
1719+ att_url = att .get ("from_url" ) or att .get ("original_url" )
1720+ if att_url and (att .get ("title" ) or att .get ("text" )):
1721+ unfurls [att_url ] = {
1722+ "title" : att .get ("title" ),
1723+ "description" : att .get ("text" ),
1724+ "image_url" : att .get ("image_url" ) or att .get ("thumb_url" ),
1725+ "site_name" : att .get ("service_name" ),
1726+ }
1727+ urls .add (att_url )
1728+
1729+ previews : list [LinkPreview ] = []
1730+ for url in urls :
1731+ preview = self ._create_link_preview (url )
1732+ unfurl = unfurls .get (url ) or unfurls .get (_TRAILING_SLASH_PATTERN .sub ("" , url )) or unfurls .get (f"{ url } /" )
1733+ if unfurl :
1734+ preview = self ._merge_unfurl_into_preview (preview , unfurl )
1735+ previews .append (preview )
1736+ return previews
1737+
1738+ @staticmethod
1739+ def _merge_unfurl_into_preview (preview : LinkPreview , unfurl : dict [str , str | None ]) -> LinkPreview :
1740+ """Return a new LinkPreview with unfurl metadata merged in.
1741+
1742+ Only fills fields that are missing on the preview — user-supplied
1743+ metadata (e.g. ``title`` from a Slack message URL) wins over the
1744+ attachment-derived unfurl. ``fetch_message`` is preserved.
1745+ """
1746+ return LinkPreview (
1747+ url = preview .url ,
1748+ title = preview .title if preview .title is not None else unfurl .get ("title" ),
1749+ description = preview .description if preview .description is not None else unfurl .get ("description" ),
1750+ image_url = preview .image_url if preview .image_url is not None else unfurl .get ("image_url" ),
1751+ site_name = preview .site_name if preview .site_name is not None else unfurl .get ("site_name" ),
1752+ fetch_message = preview .fetch_message ,
1753+ )
1754+
1755+ def _handle_message_changed (self , event : dict [str , Any ], _options : WebhookOptions | None = None ) -> None :
1756+ """Cache unfurl metadata from ``message_changed`` events.
1757+
1758+ Slack delivers link unfurls asynchronously by editing the original
1759+ message and dispatching ``message_changed``. We extract any unfurl
1760+ attachments and store them keyed by the inner message ``ts`` so
1761+ :meth:`_enrich_links` can pick them up for the original event.
1762+ """
1763+ inner = event .get ("message" )
1764+ channel = event .get ("channel" )
1765+ if not (inner and channel and isinstance (inner , dict )):
1766+ return
1767+
1768+ attachments = inner .get ("attachments" ) or []
1769+ has_unfurls = any (
1770+ isinstance (att , dict ) and (att .get ("from_url" ) or att .get ("original_url" )) for att in attachments
1771+ )
1772+ if not has_unfurls :
1773+ self ._logger .debug ("Ignoring message_changed without unfurl data" )
1774+ return
1775+
1776+ ts = inner .get ("ts" )
1777+ if not (self ._chat and ts ):
1778+ return
1779+
1780+ self ._logger .debug (
1781+ "Processing message_changed for link unfurls" ,
1782+ {"channel" : channel , "ts" : ts , "attachmentCount" : len (attachments )},
1783+ )
1784+
1785+ unfurls : dict [str , dict [str , str | None ]] = {}
1786+ for att in attachments :
1787+ if not isinstance (att , dict ):
1788+ continue
1789+ att_url = att .get ("from_url" ) or att .get ("original_url" )
1790+ if att_url and (att .get ("title" ) or att .get ("text" )):
1791+ unfurls [att_url ] = {
1792+ "title" : att .get ("title" ),
1793+ "description" : att .get ("text" ),
1794+ "image_url" : att .get ("image_url" ) or att .get ("thumb_url" ),
1795+ "site_name" : att .get ("service_name" ),
1796+ }
1797+
1798+ if not unfurls :
1799+ return
1800+
1801+ async def _store () -> None :
1802+ try :
1803+ await self ._chat .get_state ().set ( # type: ignore[union-attr]
1804+ f"slack:unfurls:{ ts } " ,
1805+ unfurls ,
1806+ _UNFURL_CACHE_TTL_MS ,
1807+ )
1808+ except Exception as exc :
1809+ self ._logger .error ("Failed to cache unfurl metadata" , {"error" : exc })
1810+
1811+ try :
1812+ task = asyncio .get_running_loop ().create_task (_store ())
1813+ task .add_done_callback (
1814+ lambda t : (
1815+ self ._logger .error ("Unfurl cache task failed" , {"error" : t .exception ()}) if t .exception () else None
1816+ )
1817+ )
1818+ except RuntimeError :
1819+ # No running loop (sync test context) — skip silently.
1820+ self ._logger .debug ("No running loop; skipping unfurl cache write" )
1821+
1822+ async def _enrich_links (self , links : list [LinkPreview ], message_ts : str | None ) -> list [LinkPreview ]:
1823+ """Enrich ``links`` with unfurl metadata from a ``message_changed`` cache.
1824+
1825+ Polls the state cache for up to ``_UNFURL_WAIT_MS`` to give Slack
1826+ time to deliver the cross-event ``message_changed`` payload.
1827+ Returns the original list (untouched) when there is nothing to wait
1828+ for.
1829+ """
1830+ if not (self ._chat and message_ts ) or not links :
1831+ return links
1832+
1833+ all_have_metadata = all ((link .title is not None ) or (link .fetch_message is not None ) for link in links )
1834+ if all_have_metadata :
1835+ return links
1836+
1837+ deadline = time .monotonic () + (_UNFURL_WAIT_MS / 1000.0 )
1838+ state = self ._chat .get_state ()
1839+ stored : dict [str , dict [str , str | None ]] | None = None
1840+ while True :
1841+ try :
1842+ stored = await state .get (f"slack:unfurls:{ message_ts } " )
1843+ except Exception :
1844+ return links
1845+ if stored or time .monotonic () >= deadline :
1846+ break
1847+ await asyncio .sleep (_UNFURL_POLL_MS / 1000.0 )
1848+
1849+ if not stored :
1850+ return links
1851+
1852+ out : list [LinkPreview ] = []
1853+ for link in links :
1854+ if link .title is not None :
1855+ out .append (link )
1856+ continue
1857+ unfurl = (
1858+ stored .get (link .url )
1859+ or stored .get (_TRAILING_SLASH_PATTERN .sub ("" , link .url ))
1860+ or stored .get (f"{ link .url } /" )
1861+ )
1862+ if unfurl :
1863+ out .append (self ._merge_unfurl_into_preview (link , unfurl ))
1864+ else :
1865+ out .append (link )
1866+ return out
16961867
16971868 def _create_link_preview (self , url : str ) -> LinkPreview :
16981869 """Create a LinkPreview for a URL.
@@ -1798,7 +1969,7 @@ async def _parse_slack_message(
17981969 self ._create_attachment (f , team_id = event .get ("team" ) or event .get ("team_id" ))
17991970 for f in event .get ("files" , [])
18001971 ],
1801- links = self ._extract_links (event ),
1972+ links = await self ._enrich_links ( self . _extract_links (event ), event . get ( "ts" ) ),
18021973 )
18031974
18041975 def _parse_slack_message_sync (self , event : dict [str , Any ], thread_id : str ) -> Message :
@@ -2302,7 +2473,15 @@ async def stream(
23022473
23032474 decoded = self .decode_thread_id (thread_id )
23042475 channel = decoded .channel
2305- thread_ts = decoded .thread_ts
2476+ # Normalize empty thread_ts to None to avoid Slack API "invalid_thread_ts" errors.
2477+ # Stream requires a real thread context — bail out when missing.
2478+ thread_ts = decoded .thread_ts or None
2479+ if not thread_ts :
2480+ self ._logger .debug ("Slack: stream skipped - no thread context" )
2481+ raise ValidationError (
2482+ "slack" ,
2483+ "Slack streaming requires a valid thread context (non-empty thread_ts)" ,
2484+ )
23062485 self ._logger .debug ("Slack: starting stream" , {"channel" : channel , "threadTs" : thread_ts })
23072486
23082487 token = self ._get_token ()
0 commit comments