@@ -122,10 +122,11 @@ def _pin_task(task: asyncio.Task[Any]) -> None:
122122_CHANNEL_CACHE_TTL_MS = 8 * 24 * 60 * 60 * 1000
123123_REVERSE_INDEX_TTL_MS = 8 * 24 * 60 * 60 * 1000
124124
125- # Ignored message subtypes (system/meta events)
125+ # Ignored message subtypes (system/meta events).
126+ # `message_changed` is NOT in this set — it is routed to
127+ # `_handle_message_changed` so we can capture link unfurl metadata.
126128_IGNORED_SUBTYPES = frozenset (
127129 {
128- "message_changed" ,
129130 "message_deleted" ,
130131 "message_replied" ,
131132 "channel_join" ,
@@ -147,6 +148,15 @@ def _pin_task(task: asyncio.Task[Any]) -> None:
147148 }
148149)
149150
151+ # Link-unfurl wait window: Slack delivers unfurled attachments via a
152+ # separate `message_changed` event ~100-2000ms after the original. We
153+ # poll briefly so the message handler sees enriched links instead of
154+ # bare URLs.
155+ _TRAILING_SLASH_PATTERN = re .compile (r"/$" )
156+ _UNFURL_WAIT_MS = 2000
157+ _UNFURL_POLL_MS = 150
158+ _UNFURL_CACHE_TTL_MS = 60 * 60 * 1000 # 1 hour
159+
150160
151161# ---------------------------------------------------------------------------
152162# Helpers
@@ -1269,6 +1279,9 @@ def _handle_message_event(self, event: dict[str, Any], options: WebhookOptions |
12691279 return
12701280
12711281 subtype = event .get ("subtype" )
1282+ if subtype == "message_changed" :
1283+ self ._handle_message_changed (event , options )
1284+ return
12721285 if subtype and subtype in _IGNORED_SUBTYPES :
12731286 self ._logger .debug ("Ignoring message subtype" , {"subtype" : subtype })
12741287 return
@@ -1722,7 +1735,13 @@ async def _resolve_message_mentions(
17221735 # ==================================================================
17231736
17241737 def _extract_links (self , event : dict [str , Any ]) -> list [LinkPreview ]:
1725- """Extract link URLs from a Slack event."""
1738+ """Extract link URLs from a Slack event.
1739+
1740+ Also merges any inline unfurl metadata that Slack already attached to
1741+ this same event (legacy ``attachments`` array). Cross-event unfurl
1742+ metadata (delivered later via ``message_changed``) is merged
1743+ asynchronously via :meth:`_enrich_links`.
1744+ """
17261745 urls : set [str ] = set ()
17271746
17281747 for block in event .get ("blocks" , []):
@@ -1738,7 +1757,159 @@ def _extract_links(self, event: dict[str, Any]) -> list[LinkPreview]:
17381757 pipe_idx = raw .find ("|" )
17391758 urls .add (raw [:pipe_idx ] if pipe_idx >= 0 else raw )
17401759
1741- return [self ._create_link_preview (url ) for url in urls ]
1760+ # Build unfurl metadata index from inline (same-event) attachments.
1761+ unfurls : dict [str , dict [str , str | None ]] = {}
1762+ for att in event .get ("attachments" ) or []:
1763+ if not isinstance (att , dict ):
1764+ continue
1765+ att_url = att .get ("from_url" ) or att .get ("original_url" )
1766+ if att_url and (att .get ("title" ) or att .get ("text" )):
1767+ unfurls [att_url ] = {
1768+ "title" : att .get ("title" ),
1769+ "description" : att .get ("text" ),
1770+ "image_url" : att .get ("image_url" ) or att .get ("thumb_url" ),
1771+ "site_name" : att .get ("service_name" ),
1772+ }
1773+ urls .add (att_url )
1774+
1775+ previews : list [LinkPreview ] = []
1776+ for url in urls :
1777+ preview = self ._create_link_preview (url )
1778+ unfurl = unfurls .get (url ) or unfurls .get (_TRAILING_SLASH_PATTERN .sub ("" , url )) or unfurls .get (f"{ url } /" )
1779+ if unfurl :
1780+ preview = self ._merge_unfurl_into_preview (preview , unfurl )
1781+ previews .append (preview )
1782+ return previews
1783+
1784+ @staticmethod
1785+ def _merge_unfurl_into_preview (preview : LinkPreview , unfurl : dict [str , str | None ]) -> LinkPreview :
1786+ """Return a new LinkPreview with unfurl metadata merged in.
1787+
1788+ Only fills fields that are missing on the preview — user-supplied
1789+ metadata (e.g. ``title`` from a Slack message URL) wins over the
1790+ attachment-derived unfurl. ``fetch_message`` is preserved.
1791+ """
1792+ return LinkPreview (
1793+ url = preview .url ,
1794+ title = preview .title if preview .title is not None else unfurl .get ("title" ),
1795+ description = preview .description if preview .description is not None else unfurl .get ("description" ),
1796+ image_url = preview .image_url if preview .image_url is not None else unfurl .get ("image_url" ),
1797+ site_name = preview .site_name if preview .site_name is not None else unfurl .get ("site_name" ),
1798+ fetch_message = preview .fetch_message ,
1799+ )
1800+
1801+ def _handle_message_changed (self , event : dict [str , Any ], _options : WebhookOptions | None = None ) -> None :
1802+ """Cache unfurl metadata from ``message_changed`` events.
1803+
1804+ Slack delivers link unfurls asynchronously by editing the original
1805+ message and dispatching ``message_changed``. We extract any unfurl
1806+ attachments and store them keyed by the inner message ``ts`` so
1807+ :meth:`_enrich_links` can pick them up for the original event.
1808+ """
1809+ inner = event .get ("message" )
1810+ channel = event .get ("channel" )
1811+ if not (inner and channel and isinstance (inner , dict )):
1812+ return
1813+
1814+ attachments = inner .get ("attachments" ) or []
1815+ has_unfurls = any (
1816+ isinstance (att , dict ) and (att .get ("from_url" ) or att .get ("original_url" )) for att in attachments
1817+ )
1818+ if not has_unfurls :
1819+ self ._logger .debug ("Ignoring message_changed without unfurl data" )
1820+ return
1821+
1822+ ts = inner .get ("ts" )
1823+ if not (self ._chat and ts ):
1824+ return
1825+
1826+ self ._logger .debug (
1827+ "Processing message_changed for link unfurls" ,
1828+ {"channel" : channel , "ts" : ts , "attachmentCount" : len (attachments )},
1829+ )
1830+
1831+ unfurls : dict [str , dict [str , str | None ]] = {}
1832+ for att in attachments :
1833+ if not isinstance (att , dict ):
1834+ continue
1835+ att_url = att .get ("from_url" ) or att .get ("original_url" )
1836+ if att_url and (att .get ("title" ) or att .get ("text" )):
1837+ unfurls [att_url ] = {
1838+ "title" : att .get ("title" ),
1839+ "description" : att .get ("text" ),
1840+ "image_url" : att .get ("image_url" ) or att .get ("thumb_url" ),
1841+ "site_name" : att .get ("service_name" ),
1842+ }
1843+
1844+ if not unfurls :
1845+ return
1846+
1847+ async def _store () -> None :
1848+ try :
1849+ await self ._chat .get_state ().set ( # type: ignore[union-attr]
1850+ f"slack:unfurls:{ ts } " ,
1851+ unfurls ,
1852+ _UNFURL_CACHE_TTL_MS ,
1853+ )
1854+ except Exception as exc :
1855+ self ._logger .error ("Failed to cache unfurl metadata" , {"error" : exc })
1856+
1857+ try :
1858+ task = asyncio .get_running_loop ().create_task (_store ())
1859+ task .add_done_callback (
1860+ lambda t : (
1861+ self ._logger .error ("Unfurl cache task failed" , {"error" : t .exception ()}) if t .exception () else None
1862+ )
1863+ )
1864+ except RuntimeError :
1865+ # No running loop (sync test context) — skip silently.
1866+ self ._logger .debug ("No running loop; skipping unfurl cache write" )
1867+
1868+ async def _enrich_links (self , links : list [LinkPreview ], message_ts : str | None ) -> list [LinkPreview ]:
1869+ """Enrich ``links`` with unfurl metadata from a ``message_changed`` cache.
1870+
1871+ Polls the state cache for up to ``_UNFURL_WAIT_MS`` to give Slack
1872+ time to deliver the cross-event ``message_changed`` payload.
1873+ Returns the original list (untouched) when there is nothing to wait
1874+ for.
1875+ """
1876+ if not (self ._chat and message_ts ) or not links :
1877+ return links
1878+
1879+ all_have_metadata = all ((link .title is not None ) or (link .fetch_message is not None ) for link in links )
1880+ if all_have_metadata :
1881+ return links
1882+
1883+ deadline = time .monotonic () + (_UNFURL_WAIT_MS / 1000.0 )
1884+ state = self ._chat .get_state ()
1885+ stored : dict [str , dict [str , str | None ]] | None = None
1886+ while True :
1887+ try :
1888+ stored = await state .get (f"slack:unfurls:{ message_ts } " )
1889+ except Exception :
1890+ return links
1891+ if stored or time .monotonic () >= deadline :
1892+ break
1893+ await asyncio .sleep (_UNFURL_POLL_MS / 1000.0 )
1894+
1895+ if not stored :
1896+ return links
1897+
1898+ out : list [LinkPreview ] = []
1899+ for link in links :
1900+ if link .title is not None :
1901+ out .append (link )
1902+ continue
1903+ unfurl = (
1904+ stored .get (link .url )
1905+ or stored .get (_TRAILING_SLASH_PATTERN .sub ("" , link .url ))
1906+ or stored .get (f"{ link .url } /" )
1907+ )
1908+ if unfurl :
1909+ out .append (self ._merge_unfurl_into_preview (link , unfurl ))
1910+ else :
1911+ out .append (link )
1912+ return out
17421913
17431914 def _create_link_preview (self , url : str ) -> LinkPreview :
17441915 """Create a LinkPreview for a URL.
@@ -1844,7 +2015,7 @@ async def _parse_slack_message(
18442015 self ._create_attachment (f , team_id = event .get ("team" ) or event .get ("team_id" ))
18452016 for f in event .get ("files" , [])
18462017 ],
1847- links = self ._extract_links (event ),
2018+ links = await self ._enrich_links ( self . _extract_links (event ), event . get ( "ts" ) ),
18482019 )
18492020
18502021 def _parse_slack_message_sync (self , event : dict [str , Any ], thread_id : str ) -> Message :
@@ -2348,7 +2519,15 @@ async def stream(
23482519
23492520 decoded = self .decode_thread_id (thread_id )
23502521 channel = decoded .channel
2351- thread_ts = decoded .thread_ts
2522+ # Normalize empty thread_ts to None to avoid Slack API "invalid_thread_ts" errors.
2523+ # Stream requires a real thread context — bail out when missing.
2524+ thread_ts = decoded .thread_ts or None
2525+ if not thread_ts :
2526+ self ._logger .debug ("Slack: stream skipped - no thread context" )
2527+ raise ValidationError (
2528+ "slack" ,
2529+ "Slack streaming requires a valid thread context (non-empty thread_ts)" ,
2530+ )
23522531 self ._logger .debug ("Slack: starting stream" , {"channel" : channel , "threadTs" : thread_ts })
23532532
23542533 token = self ._get_token ()
0 commit comments