Skip to content

Commit 43d1b58

Browse files
committed
Fix external links with anchors being rewritten as relative doc links
Protect <a href="https?://..."> links from linkify so identifiers in the URL fragment (e.g. #django.http.HttpResponseNotFound) or in the link text are not turned into relative documentation links. Mask such links with placeholders before linkify, then restore them after. Add test_external_link_with_anchor_preserved and testdata module to verify README/included markdown links to external docs with anchors remain intact.
1 parent 16e00b6 commit 43d1b58

2 files changed

Lines changed: 44 additions & 7 deletions

File tree

pdoc/render_helpers.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,15 @@ def module_candidates(identifier: str, current_module: str) -> Iterable[str]:
306306
yield identifier
307307

308308

309+
# Pattern to find <a href="http(s)://...">...</a> for protection from linkify.
310+
# Identifiers in the href (e.g. #django.http.HttpResponseNotFound) must not be
311+
# rewritten as relative doc links (see issue with README links to external docs).
312+
_absolute_link_pattern = re.compile(
313+
r'<a\s+href="https?://[^"]*"[^>]*>.*?</a>',
314+
re.DOTALL,
315+
)
316+
317+
309318
@pass_context
310319
def linkify(
311320
context: Context, code: str, namespace: str = "", shorten: bool = True
@@ -319,6 +328,15 @@ def linkify(
319328
For example, replace "current_module.Foo" with "Foo". This is useful for annotations
320329
(which are verbose), but undesired for docstrings (where we want to preserve intent).
321330
"""
331+
# Protect existing external links so identifiers in href (e.g. #django.http.HttpResponseNotFound)
332+
# or in link text are not rewritten as relative documentation links.
333+
placeholders: list[str] = []
334+
335+
def save_absolute_link(m: re.Match[str]) -> str:
336+
placeholders.append(m.group(0))
337+
return f"\u200b\u200bPDOC_EXT_LINK_{len(placeholders) - 1}\u200b\u200b"
338+
339+
code = _absolute_link_pattern.sub(save_absolute_link, code)
322340

323341
def linkify_repl(m: re.Match):
324342
"""
@@ -401,9 +419,8 @@ def linkify_repl(m: re.Match):
401419
# No matches found.
402420
return text
403421

404-
return Markup(
405-
re.sub(
406-
r"""
422+
result = re.sub(
423+
r"""
407424
# Part 1: foo.bar or foo.bar() (without backticks)
408425
(?<![/=?#&\.]) # heuristic: not part of a URL
409426
# First part of the identifier (e.g. "foo") - this is optional for relative references.
@@ -429,11 +446,13 @@ def linkify_repl(m: re.Match):
429446
(?:\(\))?
430447
(?=</code>(?!</a>))
431448
""",
432-
linkify_repl,
433-
code,
434-
flags=re.VERBOSE,
435-
)
449+
linkify_repl,
450+
code,
451+
flags=re.VERBOSE,
436452
)
453+
for i, original in enumerate(placeholders):
454+
result = result.replace(f"\u200b\u200bPDOC_EXT_LINK_{i}\u200b\u200b", original)
455+
return Markup(result)
437456

438457

439458
@pass_context

test/test_render_helpers.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66

77
from pdoc.render_helpers import edit_url
8+
from pdoc.render_helpers import linkify
89
from pdoc.render_helpers import module_candidates
910
from pdoc.render_helpers import possible_sources
1011
from pdoc.render_helpers import qualname_candidates
@@ -161,3 +162,20 @@ def test_mixed_toc():
161162
)
162163
def test_markdown_autolink(md, html):
163164
assert to_html(md) == html
165+
166+
167+
def test_external_link_with_anchor_preserved():
168+
"""
169+
External links with anchors must not be rewritten as relative documentation links.
170+
"""
171+
md = "See [HttpResponseNotFound](https://docs.djangoproject.com/en/6.0/ref/request-response/#django.http.HttpResponseNotFound)."
172+
html = to_html(md)
173+
# Minimal context so linkify runs (no modules to link to); tests that external links are preserved.
174+
class FakeModule:
175+
modulename = "test"
176+
def get(self, name):
177+
return None
178+
179+
ctx = {"module": FakeModule(), "all_modules": {}, "is_public": lambda doc: ""}
180+
result = str(linkify(ctx, html))
181+
assert "https://docs.djangoproject.com/en/6.0/ref/request-response/#django.http.HttpResponseNotFound" in result

0 commit comments

Comments
 (0)