Skip to content

Commit f3e6c22

Browse files
build: update markdownify (#719)
1 parent b7542d9 commit f3e6c22

5 files changed

Lines changed: 27 additions & 71 deletions

File tree

.basedpyright/baseline.json

Lines changed: 0 additions & 42 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

monty/utils/html_parsing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,9 @@ def _get_truncated_description(
177177
if rendered_length + element_length >= max_length:
178178
break
179179
if is_tag:
180-
element_markdown = markdown_converter.process_tag(element, convert_as_inline=False)
180+
element_markdown = markdown_converter.process_tag(element)
181181
else:
182-
element_markdown = markdown_converter.process_text(element.text)
182+
element_markdown = markdown_converter.process_text(element.text, set())
183183

184184
rendered_length += element_length
185185
tag_end_index += len(element_markdown)

monty/utils/markdown.py

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414
"DocMarkdownConverter",
1515
"remove_codeblocks",
1616
)
17-
# taken from version 0.6.1 of markdownify
18-
WHITESPACE_RE = re.compile(r"[\r\n\s\t ]+")
1917

2018

2119
CODE_BLOCK_RE = re.compile(
@@ -35,20 +33,18 @@ def remove_codeblocks(content: str) -> str:
3533
class DocMarkdownConverter(MarkdownConverter):
3634
"""Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
3735

38-
def __init__(self, *, page_url: str, **options) -> None:
39-
super().__init__(**options)
40-
self.page_url = page_url
36+
def __init__(self, *, page_url: str, **options):
37+
# Reflow text to avoid unwanted line breaks.
38+
default_options = {"wrap": True, "wrap_width": None}
4139

42-
# overwritten to use our regex from version 0.6.1
43-
def process_text(self, text: str | None) -> Any:
44-
"""Process the text, using our custom regex."""
45-
return self.escape(WHITESPACE_RE.sub(" ", text or ""))
40+
super().__init__(**default_options | options)
41+
self.page_url = page_url
4642

47-
def convert_img(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
43+
def convert_img(self, el: PageElement, text: str, parent_tags: set[str]) -> str:
4844
"""Remove images from the parsed contents, we don't want them."""
4945
return ""
5046

51-
def convert_li(self, el: Tag, text: str, convert_as_inline: bool) -> str:
47+
def convert_li(self, el: Tag, text: str, parent_tags: set[str]) -> str:
5248
"""Fix markdownify's erroneous indexing in ol tags."""
5349
parent = el.parent
5450
if parent is not None and parent.name == "ol":
@@ -65,40 +61,42 @@ def convert_li(self, el: Tag, text: str, convert_as_inline: bool) -> str:
6561
bullet = bullets[depth % len(bullets)]
6662
return f"{bullet} {text}\n"
6763

68-
def convert_hn(self, _n: int, el: PageElement, text: str, convert_as_inline: bool) -> str:
64+
def _convert_hn(self, n: int, el: PageElement, text: str, parent_tags: set[str]) -> str:
6965
"""Convert h tags to bold text with ** instead of adding #."""
70-
if convert_as_inline:
66+
if "_inline" in parent_tags:
7167
return text
7268
return f"**{text}**\n\n"
7369

74-
def convert_code(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
70+
def convert_code(self, el: PageElement, text: str, parent_tags: set[str]) -> str:
7571
"""Undo `markdownify`s underscore escaping."""
7672
return f"`{text}`".replace("\\", "")
7773

78-
def convert_pre(self, el: Tag, text: str, convert_as_inline: bool) -> str:
74+
def convert_pre(self, el: Tag, text: str, parent_tags: set[str]) -> str: # pyright: ignore[reportIncompatibleMethodOverride] # bug in pyright
7975
"""Wrap any codeblocks in `py` for syntax highlighting."""
8076
code = "".join(el.strings)
8177
return f"```py\n{code}```"
8278

83-
def convert_a(self, el: Tag, text: str, convert_as_inline: bool) -> str:
79+
def convert_a(self, el: Tag, text: str, parent_tags: set[str]) -> str:
8480
"""Resolve relative URLs to `self.page_url`."""
8581
href = el["href"]
8682
assert isinstance(href, str)
8783
el["href"] = urljoin(self.page_url, href)
88-
return super().convert_a(el, text, convert_as_inline)
84+
# Discord doesn't handle titles properly, showing links with them as raw text.
85+
el["title"] = ""
86+
return super().convert_a(el, text, parent_tags)
8987

90-
def convert_p(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
88+
def convert_p(self, el: PageElement, text: str, parent_tags: set[str]) -> str:
9189
"""Include only one newline instead of two when the parent is a li tag."""
92-
if convert_as_inline:
90+
if "_inline" in parent_tags:
9391
return text
9492

9593
parent = el.parent
9694
if parent is not None and parent.name == "li":
9795
return f"{text}\n"
98-
return super().convert_p(el, text, convert_as_inline)
96+
return super().convert_p(el, text, parent_tags)
9997

100-
def convert_hr(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
101-
"""Convert hr tags to nothing. This is because later versions added this method."""
98+
def convert_hr(self, el: PageElement, text: str, parent_tags: set[str]) -> str: # pyright: ignore[reportIncompatibleMethodOverride] # bug in pyright
99+
"""Ignore `hr` tag."""
102100
return ""
103101

104102

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ dependencies = [
2727
"httpx>=0.28.1",
2828
"httpx-aiohttp>=0.1.9",
2929
"lxml<6.0.0,>=5.4.0",
30-
"markdownify==0.11.6",
30+
"markdownify~=1.1.0",
3131
"mistune<3.0.0,>=2.0.4",
3232
"msgpack<2.0.0,>=1.1.0",
3333
"orjson<4.0.0,>=3.10.18",

uv.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)