Skip to content

Commit 22c408d

Browse files
feat(teams): format primitive subpath (chat@4.31 8c71411)
Port the NET-NEW upstream packages/adapter-teams/src/format/index.ts helpers as a low-level, runtime-free string-primitive module at src/chat_sdk/adapters/teams/format.py — distinct from the existing AST-based format_converter.py. Helpers: escape_teams_text / unescape_teams_text, format_teams_mention, teams_mention_to_plain_text, teams_html_to_markdown, markdown_to_teams_html, convert_teams_emoji_placeholders, safe_link_href. - SDK-free: plain strings + stdlib only (re, urllib.parse); no microsoft_teams.* and no adapter/runtime imports. Mirrors the Slack primitive subpath layout and lazy-import style. - markdown_to_teams_html gates link hrefs through an exact {http, https, mailto} protocol allowlist via urllib.parse.urlparse (port of the upstream URL().protocol check) — rejects javascript:, data:, relative, and other unsafe hrefs (SSRF / injection guard). - escape_teams_text runs before any tag insertion so a user-supplied '<' cannot forge HTML (emit/parse symmetry). - Emoji placeholders reuse chat_sdk.emoji (no duplicated emoji map): each upstream colon placeholder maps to a normalized SDK emoji name and delegates the unicode lookup to convert_emoji_placeholders. Tests (tests/test_teams_format_primitives.py): the 6 upstream index.test.ts cases + a source-level import-boundary test (faithful port of format/boundary.test.ts) + adversarial forged-tag and disallowed-protocol cases. 14 tests, all green. Lane-scoped: only the new module + its test file; no shared files touched (lazy subpath registration in teams/__init__.py deferred to the packaging PR).
1 parent 5765b3d commit 22c408d

2 files changed

Lines changed: 336 additions & 0 deletions

File tree

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
"""Teams format primitives — a lightweight, runtime-free subpath.
2+
3+
Port of ``packages/adapter-teams/src/format/index.ts`` (vercel/chat@4.31,
4+
commit 8c71411), exposed upstream as ``@chat-adapter/teams/format``. Provides
5+
runtime-free primitives for escaping/unescaping Teams text, building and
6+
normalizing ``<at>`` mentions, and converting between Teams' restricted HTML
7+
subset and Markdown-ish text — without the full Teams adapter, the
8+
``microsoft_teams`` SDK, or the chat runtime.
9+
10+
This is intentionally distinct from :mod:`chat_sdk.adapters.teams.format_converter`,
11+
which is a higher-level AST-based converter. The helpers here are low-level
12+
string primitives that operate purely on text.
13+
14+
Importing this module never imports the ``microsoft_teams`` SDK, an HTTP
15+
client, or the high-level :mod:`chat_sdk.adapters.teams.adapter` module. Emoji
16+
placeholder conversion delegates to :mod:`chat_sdk.emoji` so the emoji map is
17+
never duplicated here.
18+
19+
Python-specific hardening (divergence from upstream, see
20+
``docs/UPSTREAM_SYNC.md``): :func:`markdown_to_teams_html` gates link hrefs
21+
through an exact ``{http, https, mailto}`` protocol allowlist using
22+
:func:`urllib.parse.urlparse` (port of the upstream ``URL().protocol`` check),
23+
rejecting ``javascript:``, ``data:``, relative, and other unsafe hrefs so they
24+
render as plain text rather than active links (SSRF / injection guard).
25+
"""
26+
27+
from __future__ import annotations
28+
29+
import re
30+
from urllib.parse import urlparse
31+
32+
from chat_sdk.emoji import convert_emoji_placeholders
33+
34+
__all__ = [
35+
"convert_teams_emoji_placeholders",
36+
"escape_teams_text",
37+
"format_teams_mention",
38+
"markdown_to_teams_html",
39+
"safe_link_href",
40+
"teams_html_to_markdown",
41+
"teams_mention_to_plain_text",
42+
"unescape_teams_text",
43+
]
44+
45+
# JS source patterns ported 1:1. The `gis` flags become DOTALL | IGNORECASE in
46+
# Python; JS `g` (replace-all) is the default for `re.sub`/`str.replace`.
47+
_HTML_ESCAPE_PATTERN = re.compile(r"[&<>\"]")
48+
_MARKDOWN_LINK_PATTERN = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
49+
_TEAMS_MENTION_PATTERN = re.compile(r"<at\b[^>]*>(.*?)</at>", re.DOTALL | re.IGNORECASE)
50+
_TAG_PATTERN = re.compile(r"<[^>]+>")
51+
52+
# Order matters: `&` is escaped via the single-pass regex below so an already
53+
# present `&` is not double-escaped. Matches upstream `HTML_ESCAPES`.
54+
_HTML_ESCAPES: dict[str, str] = {
55+
'"': "&quot;",
56+
"&": "&amp;",
57+
"<": "&lt;",
58+
">": "&gt;",
59+
}
60+
61+
# Upstream `EMOJI_PLACEHOLDERS` maps Slack-style colon placeholders to unicode.
62+
# Rather than re-declare the unicode (which would duplicate the emoji map), we
63+
# map each upstream placeholder to its normalized name in :mod:`chat_sdk.emoji`
64+
# and delegate the unicode lookup to that single source of truth.
65+
_PLACEHOLDER_TO_NORMALIZED: dict[str, str] = {
66+
":red_circle:": "red_circle",
67+
":warning:": "warning",
68+
":white_check_mark:": "check",
69+
":x:": "x",
70+
}
71+
72+
# Exact protocol allowlist for Markdown link hrefs (port of upstream
73+
# `SAFE_LINK_PROTOCOLS`). `urlparse` lowercases the scheme and yields it
74+
# without the trailing colon, so these are bare scheme names.
75+
_SAFE_LINK_PROTOCOLS: frozenset[str] = frozenset({"http", "https", "mailto"})
76+
77+
78+
def escape_teams_text(text: str) -> str:
79+
"""Escape the Teams HTML control characters (``&``, ``<``, ``>``, ``"``).
80+
81+
Run this before inserting any HTML tags so user-supplied ``<`` cannot
82+
forge markup.
83+
"""
84+
return _HTML_ESCAPE_PATTERN.sub(lambda m: _HTML_ESCAPES.get(m.group(0), m.group(0)), text)
85+
86+
87+
def unescape_teams_text(text: str) -> str:
88+
"""Reverse :func:`escape_teams_text`.
89+
90+
Entities are replaced in reverse order (``&amp;`` last) so a literal
91+
``&amp;lt;`` does not collapse into ``<``.
92+
"""
93+
return text.replace("&quot;", '"').replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
94+
95+
96+
def format_teams_mention(name: str) -> str:
97+
"""Wrap a display name in an escaped Teams ``<at>`` mention tag."""
98+
return f"<at>{escape_teams_text(name)}</at>"
99+
100+
101+
def teams_mention_to_plain_text(text: str) -> str:
102+
"""Replace Teams ``<at>...</at>`` mention tags with ``@name`` plain text."""
103+
104+
def _replace(match: re.Match[str]) -> str:
105+
name = match.group(1)
106+
return f"@{unescape_teams_text(_strip_tags(name).strip())}"
107+
108+
return _TEAMS_MENTION_PATTERN.sub(_replace, text)
109+
110+
111+
def teams_html_to_markdown(html: str) -> str:
112+
"""Convert Teams' restricted HTML subset to Markdown-ish text."""
113+
text = teams_mention_to_plain_text(html)
114+
text = re.sub(r"<strong\b[^>]*>(.*?)</strong>", r"**\1**", text, flags=re.DOTALL | re.IGNORECASE)
115+
text = re.sub(r"<b\b[^>]*>(.*?)</b>", r"**\1**", text, flags=re.DOTALL | re.IGNORECASE)
116+
text = re.sub(r"<em\b[^>]*>(.*?)</em>", r"_\1_", text, flags=re.DOTALL | re.IGNORECASE)
117+
text = re.sub(r"<i\b[^>]*>(.*?)</i>", r"_\1_", text, flags=re.DOTALL | re.IGNORECASE)
118+
text = re.sub(r"<s\b[^>]*>(.*?)</s>", r"~~\1~~", text, flags=re.DOTALL | re.IGNORECASE)
119+
text = re.sub(r"<strike\b[^>]*>(.*?)</strike>", r"~~\1~~", text, flags=re.DOTALL | re.IGNORECASE)
120+
text = re.sub(r"<code\b[^>]*>(.*?)</code>", r"`\1`", text, flags=re.DOTALL | re.IGNORECASE)
121+
text = re.sub(
122+
r"<a\b[^>]*href=[\"']([^\"']+)[\"'][^>]*>(.*?)</a>",
123+
r"[\2](\1)",
124+
text,
125+
flags=re.DOTALL | re.IGNORECASE,
126+
)
127+
text = re.sub(r"<br\s*/?>", "\n", text, flags=re.IGNORECASE)
128+
text = re.sub(r"</p>\s*<p[^>]*>", "\n\n", text, flags=re.IGNORECASE)
129+
text = _TAG_PATTERN.sub("", text)
130+
text = text.replace(" ", " ")
131+
return unescape_teams_text(text).strip()
132+
133+
134+
def markdown_to_teams_html(markdown: str) -> str:
135+
"""Convert Markdown-ish text to Teams' restricted HTML subset.
136+
137+
The input is escaped *before* any tag insertion so user-supplied ``<``
138+
cannot forge HTML. Link hrefs are gated through :func:`safe_link_href`;
139+
unsafe hrefs render as plain label text.
140+
"""
141+
text = convert_teams_emoji_placeholders(escape_teams_text(markdown))
142+
text = re.sub(r"\*\*(.*?)\*\*", r"<strong>\1</strong>", text)
143+
text = re.sub(r"_(.*?)_", r"<em>\1</em>", text)
144+
text = re.sub(r"~~(.*?)~~", r"<s>\1</s>", text)
145+
text = re.sub(r"`([^`]+)`", r"<code>\1</code>", text)
146+
147+
def _link(match: re.Match[str]) -> str:
148+
label, href = match.group(1), match.group(2)
149+
return f'<a href="{href}">{label}</a>' if safe_link_href(href) else label
150+
151+
text = _MARKDOWN_LINK_PATTERN.sub(_link, text)
152+
return text.replace("\n", "<br>")
153+
154+
155+
def convert_teams_emoji_placeholders(text: str) -> str:
156+
"""Convert Teams' Slack-style colon emoji placeholders to unicode.
157+
158+
Delegates the unicode lookup to :mod:`chat_sdk.emoji` so the emoji map is
159+
never duplicated. Each upstream placeholder (``:white_check_mark:`` etc.)
160+
is mapped to its normalized SDK emoji name and resolved to the platform
161+
(unicode) form via :func:`chat_sdk.emoji.convert_emoji_placeholders`.
162+
"""
163+
converted = text
164+
for placeholder, normalized in _PLACEHOLDER_TO_NORMALIZED.items():
165+
converted = converted.replace(
166+
placeholder,
167+
convert_emoji_placeholders(f"{{{{emoji:{normalized}}}}}", "teams"),
168+
)
169+
return converted
170+
171+
172+
def _strip_tags(text: str) -> str:
173+
return _TAG_PATTERN.sub("", text)
174+
175+
176+
def safe_link_href(href: str) -> bool:
177+
"""Return ``True`` only for ``http``/``https``/``mailto`` hrefs.
178+
179+
Port of the upstream ``safeLinkHref`` protocol check using
180+
:func:`urllib.parse.urlparse`. Rejects ``javascript:``, ``data:``,
181+
relative, and any other scheme (SSRF / injection guard).
182+
"""
183+
try:
184+
return urlparse(href).scheme in _SAFE_LINK_PROTOCOLS
185+
except ValueError:
186+
return False
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
"""Tests for the Teams format primitives subpath.
2+
3+
Port of ``packages/adapter-teams/src/format/index.test.ts`` and
4+
``format/boundary.test.ts`` (vercel/chat@4.31, commit 8c71411).
5+
6+
Distinct from ``test_teams_format.py``, which covers the higher-level
7+
AST-based ``TeamsFormatConverter``; this file covers the low-level
8+
``chat_sdk.adapters.teams.format`` string primitives and mirrors the Slack
9+
primitive test layout (``test_slack_format_primitives.py``).
10+
11+
Adversarial cases (per docs/SELF_REVIEW.md) extend the upstream suite with
12+
forged-tag escaping and the disallowed-protocol SSRF gate.
13+
"""
14+
15+
from __future__ import annotations
16+
17+
import importlib.util
18+
from pathlib import Path
19+
20+
from chat_sdk.adapters.teams.format import (
21+
convert_teams_emoji_placeholders,
22+
escape_teams_text,
23+
format_teams_mention,
24+
markdown_to_teams_html,
25+
safe_link_href,
26+
teams_html_to_markdown,
27+
teams_mention_to_plain_text,
28+
unescape_teams_text,
29+
)
30+
31+
32+
class TestTeamsFormatPrimitives:
33+
"""Direct ports of the upstream ``Teams format primitives`` suite."""
34+
35+
def test_escapes_and_unescapes_teams_text(self):
36+
escaped = escape_teams_text('<hello & "world">')
37+
assert escaped == "&lt;hello &amp; &quot;world&quot;&gt;"
38+
assert unescape_teams_text(escaped) == '<hello & "world">'
39+
40+
def test_formats_and_normalizes_mentions(self):
41+
assert format_teams_mention("Ada & Ben") == "<at>Ada &amp; Ben</at>"
42+
assert teams_mention_to_plain_text("<at>Ada &amp; Ben</at> hi") == "@Ada & Ben hi"
43+
44+
def test_converts_teams_html_to_markdown_ish_text(self):
45+
assert (
46+
teams_html_to_markdown('<p>Hello <strong>world</strong><br><a href="https://example.com">link</a></p>')
47+
== "Hello **world**\n[link](https://example.com)"
48+
)
49+
50+
def test_converts_markdown_ish_text_to_teams_html(self):
51+
assert (
52+
markdown_to_teams_html("**Ship** [now](https://example.com)")
53+
== '<strong>Ship</strong> <a href="https://example.com">now</a>'
54+
)
55+
assert markdown_to_teams_html("[email](mailto:ada@example.com)") == '<a href="mailto:ada@example.com">email</a>'
56+
57+
def test_renders_unsafe_markdown_links_as_plain_text(self):
58+
assert markdown_to_teams_html("[bad](javascript:alert)") == "bad"
59+
assert markdown_to_teams_html("[relative](/internal)") == "relative"
60+
61+
def test_converts_common_emoji_placeholders(self):
62+
assert convert_teams_emoji_placeholders(":white_check_mark: done") == "✅ done"
63+
64+
65+
class TestTeamsFormatAdversarial:
66+
"""Adversarial escape / SSRF cases beyond the upstream suite."""
67+
68+
def test_escape_runs_before_tag_insertion_so_forged_tags_are_inert(self):
69+
# User-supplied `<` must be escaped before any tag insertion, so a raw
70+
# `<script>` cannot survive as live markup (emit/parse symmetry).
71+
assert markdown_to_teams_html("<script>alert(1)</script>") == ("&lt;script&gt;alert(1)&lt;/script&gt;")
72+
73+
def test_forged_anchor_in_label_does_not_emit_live_link(self):
74+
# A `<` inside the markdown link label is escaped first, so the only
75+
# anchor emitted is the real, protocol-gated one.
76+
out = markdown_to_teams_html("[<b>x</b>](https://ok.com)")
77+
assert out == '<a href="https://ok.com">&lt;b&gt;x&lt;/b&gt;</a>'
78+
79+
def test_disallowed_protocols_are_rejected_by_the_ssrf_gate(self):
80+
# Paren-free hrefs (the upstream MARKDOWN_LINK_PATTERN href group is
81+
# `[^)]+`, so an inner `)` is outside the link grammar by design).
82+
for href in (
83+
"javascript:alert",
84+
"data:text/html,evil",
85+
"vbscript:msgbox",
86+
"file:///etc/passwd",
87+
"/relative",
88+
"ftp://example.com/x",
89+
):
90+
assert markdown_to_teams_html(f"[label]({href})") == "label", href
91+
92+
def test_allowed_protocols_pass_the_ssrf_gate(self):
93+
assert safe_link_href("http://example.com") is True
94+
assert safe_link_href("https://example.com") is True
95+
assert safe_link_href("mailto:ada@example.com") is True
96+
# urlparse lowercases the scheme, so the gate is case-insensitive.
97+
assert safe_link_href("HTTPS://EXAMPLE.COM") is True
98+
99+
def test_disallowed_protocols_fail_the_ssrf_gate(self):
100+
assert safe_link_href("javascript:alert(1)") is False
101+
assert safe_link_href("data:text/html,x") is False
102+
assert safe_link_href("/internal") is False
103+
assert safe_link_href("") is False
104+
105+
def test_unescape_does_not_collapse_double_escaped_ampersand(self):
106+
# `&amp;lt;` must round-trip to `&lt;`, not `<` (reverse-order unescape).
107+
assert unescape_teams_text("&amp;lt;") == "&lt;"
108+
109+
110+
def _format_module_source() -> str:
111+
spec = importlib.util.find_spec("chat_sdk.adapters.teams.format")
112+
assert spec is not None and spec.origin is not None
113+
return Path(spec.origin).read_text(encoding="utf8")
114+
115+
116+
class TestFormatImportBoundary:
117+
"""Port of upstream ``format/boundary.test.ts``.
118+
119+
The format subpath must stay runtime-free: its own source must not
120+
reference the ``microsoft_teams`` SDK, an HTTP client (``httpx`` /
121+
``aiohttp``), or the high-level adapter / chat runtime. (Upstream's
122+
boundary test reads the module source and asserts it does not contain the
123+
forbidden imports — runtime ``sys.modules`` inspection is deferred until
124+
the ``teams/__init__.py`` lazy-subpath migration in the packaging PR.)
125+
"""
126+
127+
def test_source_does_not_import_the_sdk_runtime_or_adapter(self):
128+
# Inspect only the actual import statements, so the docstring (which
129+
# *mentions* these modules to describe the boundary) is not flagged.
130+
import_lines = [
131+
line.strip()
132+
for line in _format_module_source().splitlines()
133+
if line.strip().startswith(("import ", "from "))
134+
]
135+
forbidden = (
136+
"microsoft_teams",
137+
"httpx",
138+
"aiohttp",
139+
"chat_sdk.adapters.teams.adapter",
140+
"chat_sdk.adapters.teams.types",
141+
"chat_sdk.adapters.teams.cards",
142+
"chat_sdk.chat",
143+
)
144+
present = [f"{token} :: {line}" for line in import_lines for token in forbidden if token in line]
145+
assert not present, f"format primitive imports forbidden modules: {present}"
146+
147+
def test_emoji_reuse_does_not_duplicate_unicode(self):
148+
# The emoji map must come from chat_sdk.emoji, not be re-declared here.
149+
source = _format_module_source()
150+
assert "from chat_sdk.emoji import convert_emoji_placeholders" in source

0 commit comments

Comments
 (0)