|
2 | 2 | from typing import Any |
3 | 3 | from urllib.parse import urljoin |
4 | 4 |
|
5 | | -import mistune.renderers |
| 5 | +import mistune.renderers.markdown |
6 | 6 | from bs4.element import PageElement, Tag |
7 | 7 | from markdownify import MarkdownConverter |
8 | | - |
9 | | -from monty import constants |
| 8 | +from mistune.core import BlockState |
| 9 | +from typing_extensions import override |
10 | 10 |
|
11 | 11 |
|
12 | 12 | __all__ = ( |
|
15 | 15 | "remove_codeblocks", |
16 | 16 | ) |
17 | 17 |
|
| 18 | +RenderToken = dict[str, Any] |
| 19 | + |
18 | 20 |
|
19 | 21 | CODE_BLOCK_RE = re.compile( |
20 | 22 | r"```(.+?)```|(?P<delim>`{1,2})([^\n]+?)(?P=delim)", |
@@ -100,155 +102,115 @@ def convert_hr(self, el: PageElement, text: str, parent_tags: set[str]) -> str: |
100 | 102 | return "" |
101 | 103 |
|
102 | 104 |
|
103 | | -# TODO: this will be expanded over time as necessary |
104 | | -class DiscordRenderer(mistune.renderers.BaseRenderer): |
105 | | - """Custom renderer for markdown to discord compatiable markdown.""" |
| 105 | +class DiscordRenderer(mistune.renderers.markdown.MarkdownRenderer): |
| 106 | + """Custom renderer for markdown to discord compatible markdown.""" |
106 | 107 |
|
107 | 108 | def __init__(self, repo: str | None = None) -> None: |
108 | 109 | self._repo = (repo or "").rstrip("/") |
109 | 110 |
|
110 | | - def text(self, text: str) -> str: |
| 111 | + @override |
| 112 | + def text(self, token: RenderToken, state: BlockState) -> str: |
111 | 113 | """Replace GitHub links with their expanded versions.""" |
| 114 | + text: str = token["raw"] |
112 | 115 | if self._repo: |
113 | 116 | # TODO: expand this to all different varieties of automatic links |
| 117 | + # FIXME: this shouldn't expand shorthands inside []() links |
114 | 118 | # if a repository is provided we replace all snippets with the correct thing |
115 | 119 | def replacement(match: re.Match[str]) -> str: |
116 | | - return self.link(self._repo + "/issues/" + match[1], text=match[0]) |
| 120 | + full, num = match[0], match[1] |
| 121 | + url = f"{self._repo}/issues/{num}" |
| 122 | + # NOTE: until the above fixme is resolved, we can't use self.link here, |
| 123 | + # since it would recurse indefinitely. |
| 124 | + return f"[{full}]({url})" |
117 | 125 |
|
118 | 126 | text = GH_ISSUE_RE.sub(replacement, text) |
119 | 127 | return text |
120 | 128 |
|
121 | | - def link(self, link: str, text: str | None = None, title: str | None = None) -> str: |
122 | | - """Properly format a link.""" |
123 | | - if text or title: |
124 | | - if not text: |
125 | | - text = link |
126 | | - if title: |
127 | | - paran = f'({link} "{title}")' |
128 | | - else: |
129 | | - paran = f"({link})" |
130 | | - return f"[{text}]{paran}" |
131 | | - else: |
132 | | - return link |
133 | | - |
134 | | - def image(self, src: str, alt: str | None = None, title: str | None = None) -> str: |
135 | | - """Return a link to the provided image.""" |
136 | | - return "!" + self.link(src, text="image", title=alt) |
137 | | - |
138 | | - def emphasis(self, text: str) -> str: |
139 | | - """Return italiced text.""" |
140 | | - return f"*{text}*" |
141 | | - |
142 | | - def strong(self, text: str) -> str: |
143 | | - """Return bold text.""" |
144 | | - return f"**{text}**" |
145 | | - |
146 | | - def strikethrough(self, text: str) -> str: |
| 129 | + # Discord renders links regardless of whether it's `link` or `<link>` |
| 130 | + @override |
| 131 | + def link(self, token: RenderToken, state: BlockState) -> str: |
| 132 | + """Format links, removing unnecessary angle brackets.""" |
| 133 | + s = super().link(token, state) |
| 134 | + if s.startswith("<") and s.endswith(">"): |
| 135 | + s = s[1:-1] |
| 136 | + return s |
| 137 | + |
| 138 | + # provided by plugin, so not part of base MarkdownRenderer |
| 139 | + def strikethrough(self, token: RenderToken, state: BlockState) -> str: |
147 | 140 | """Return crossed-out text.""" |
| 141 | + text = self.render_children(token, state) |
148 | 142 | return f"~~{text}~~" |
149 | 143 |
|
150 | | - def heading(self, text: str, level: int) -> str: |
| 144 | + @override |
| 145 | + def heading(self, token: RenderToken, state: BlockState) -> str: |
151 | 146 | """Format the heading normally if it's large enough, or underline it.""" |
| 147 | + level: int = token["attrs"]["level"] |
| 148 | + text = self.render_children(token, state) |
152 | 149 | if level in (1, 2, 3): |
153 | 150 | return "#" * level + f" {text.strip()}\n" |
154 | 151 | else: |
| 152 | + # TODO: consider `-# __text__` for level 5 (smallest) headings? |
155 | 153 | return f"__{text}__\n" |
156 | 154 |
|
157 | | - def newline(self) -> str: |
158 | | - """No op.""" |
| 155 | + @override |
| 156 | + def inline_html(self, token: RenderToken, state: BlockState) -> str: |
| 157 | + """No op, Discord doesn't render HTML.""" |
159 | 158 | return "" |
160 | 159 |
|
161 | | - # this is for forced breaks like `text \ntext`; Discord |
162 | | - def linebreak(self) -> str: |
163 | | - """Return a new line.""" |
164 | | - return "\n" |
165 | | - |
166 | | - def inline_html(self, html: str) -> str: |
167 | | - """No op.""" |
| 160 | + @override |
| 161 | + def thematic_break(self, token: RenderToken, state: BlockState) -> str: |
| 162 | + """No op, Discord doesn't render breaks as horizontal rules.""" |
168 | 163 | return "" |
169 | 164 |
|
170 | | - def thematic_break(self) -> str: |
171 | | - """No op.""" |
172 | | - return "" |
173 | | - |
174 | | - def block_text(self, text: str) -> str: |
175 | | - """Return text in lists as-is.""" |
176 | | - return text + "\n" |
| 165 | + # Block code can be fenced by 3+ backticks or 3+ tildes, or be an indented block. |
| 166 | + # Discord only renders code blocks with exactly 3 backticks, so we have to force this format. |
| 167 | + @override |
| 168 | + def block_code(self, token: RenderToken, state: BlockState) -> str: |
| 169 | + """Put code in a codeblock with triple backticks.""" |
| 170 | + code: str = token["raw"] |
| 171 | + info: str | None = token.get("attrs", {}).get("info") |
177 | 172 |
|
178 | | - def block_code(self, code: str, info: str | None = None) -> str: |
179 | | - """Put the code in a codeblock.""" |
180 | 173 | md = "```" |
181 | | - if info is not None: |
182 | | - info = info.strip() |
183 | 174 | if info: |
184 | | - lang = info.split(None, 1)[0] |
185 | | - md += lang |
| 175 | + lang = info.strip().split(None, 1)[0] |
| 176 | + if lang: |
| 177 | + md += lang |
186 | 178 | md += "\n" |
187 | | - return md + code.replace("`" * 3, "`\u200b" * 3) + "\n```\n" |
188 | 179 |
|
189 | | - def block_quote(self, text: str) -> str: |
190 | | - """Quote the provided text.""" |
191 | | - if text: |
192 | | - return "> " + "> ".join(text.rstrip().splitlines(keepends=True)) + "\n\n" |
193 | | - return "" |
| 180 | + return md + code.replace("`" * 3, "`\u200b" * 3) + "\n```\n" |
194 | 181 |
|
195 | | - def block_html(self, html: str) -> str: |
196 | | - """No op.""" |
| 182 | + @override |
| 183 | + def block_html(self, token: RenderToken, state: BlockState) -> str: |
| 184 | + """No op, Discord doesn't render HTML.""" |
197 | 185 | return "" |
198 | 186 |
|
199 | | - def block_error(self, html: str) -> str: |
| 187 | + @override |
| 188 | + def block_error(self, token: RenderToken, state: BlockState) -> str: |
200 | 189 | """No op.""" |
201 | 190 | return "" |
202 | 191 |
|
203 | | - def codespan(self, text: str) -> str: |
| 192 | + # Codespans can be delimited with two backticks as well, which allows having |
| 193 | + # single backticks in the contents. |
| 194 | + # Additionally, the delimiters may include one space, e.g. "`` text ``", for text that starts/ends |
| 195 | + # with a backtick. Mistune strips these spaces, but we need them to avoid breaking formatting. |
| 196 | + # Discord renders these spaces (even though they shouldn't), but it's better than no formatting at all. |
| 197 | + # TODO: instead of spaces, we could use \u200b? |
| 198 | + @override |
| 199 | + def codespan(self, token: RenderToken, state: BlockState) -> str: |
204 | 200 | """Return the text in a codeblock.""" |
205 | | - char = "``" if "`" in text else "`" |
206 | | - return char + text + char |
207 | | - |
208 | | - def paragraph(self, text: str) -> str: |
209 | | - """Return a paragraph with a newline postceeding.""" |
210 | | - return f"{text}\n\n" |
211 | | - |
212 | | - def list(self, text: str, ordered: bool, level: int, start: Any = None) -> str: |
213 | | - """Return the unedited list.""" |
214 | | - # TODO: figure out how this should actually work |
215 | | - if level == 1: |
216 | | - return text.lstrip("\n") + "\n" |
217 | | - return text |
| 201 | + text: str = token["raw"] |
| 202 | + |
| 203 | + delim = "``" if "`" in text else "`" |
| 204 | + |
| 205 | + if text.startswith("`") or text.endswith("`"): |
| 206 | + text = f" {text} " |
| 207 | + |
| 208 | + return delim + text + delim |
218 | 209 |
|
219 | | - def list_item(self, text: str, level: int) -> str: |
220 | | - """Show the list, indented to its proper level.""" |
221 | | - lines = text.rstrip().splitlines() |
222 | | - |
223 | | - prefix = "- " |
224 | | - result: list[str] = [prefix + lines[0]] |
225 | | - |
226 | | - # just add one level of indentation; any outer lists will indent this again as needed |
227 | | - indent = " " * len(prefix) |
228 | | - in_codeblock = "```" in lines[0] |
229 | | - for line in lines[1:]: |
230 | | - if not line.strip(): |
231 | | - # whitespace-only lines can be rendered as empty |
232 | | - result.append("") |
233 | | - continue |
234 | | - |
235 | | - if in_codeblock: |
236 | | - # don't indent lines inside codeblocks |
237 | | - result.append(line) |
238 | | - else: |
239 | | - result.append(indent + line) |
240 | | - |
241 | | - # check this at the end, since the first codeblock line should generally be indented |
242 | | - if "```" in line: |
243 | | - in_codeblock = not in_codeblock |
244 | | - |
245 | | - return "\n".join(result) + "\n" |
246 | | - |
247 | | - def task_list_item(self, text: Any, level: int, checked: bool = False, **attrs) -> str: |
248 | | - """Convert task list options to emoji.""" |
249 | | - emoji = constants.Emojis.confirmation if checked else constants.Emojis.no_choice_light |
250 | | - return self.list_item(emoji + " " + text, level=level) |
251 | | - |
252 | | - def finalize(self, data: Any) -> str: |
253 | | - """Finalize the data.""" |
254 | | - return "".join(data) |
| 210 | + # FIXME: restore this, plugin rendering changed significantly |
| 211 | + # # def task_list_item(self, text: Any, level: int, checked: bool = False, **attrs) -> str: |
| 212 | + # def task_list_item(self, token: RenderToken, state: BlockState) -> str: |
| 213 | + # """Convert task list options to emoji.""" |
| 214 | + # checked: bool = token["attrs"]["checked"] |
| 215 | + # emoji = constants.Emojis.confirmation if checked else constants.Emojis.no_choice_light |
| 216 | + # return self.list_item(emoji + " " + text, level=level) |
0 commit comments