Skip to content

Commit c8b6fc3

Browse files
authored
👌 IMPROVE: Allow setting {#id} on headings (#706)
For use with the `attrs_block` extension. This just moves around the logic for implicit heading anchors a bit (without changing anything user facing), to allow distinguishing between explicit and implicit heading ids.
1 parent 41a5061 commit c8b6fc3

7 files changed

Lines changed: 119 additions & 44 deletions

File tree

myst_parser/cli.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import sys
33

44
from markdown_it.renderer import RendererHTML
5+
from markdown_it.rules_core import StateCore
6+
from mdit_py_plugins.anchors import anchors_plugin
57

68
from myst_parser.config.main import MdParserConfig
79
from myst_parser.parsers.mdit import create_md_parser
@@ -28,9 +30,10 @@ def print_anchors(args=None):
2830
"-l", "--level", type=int, default=2, help="Maximum heading level."
2931
)
3032
args = arg_parser.parse_args(args)
31-
parser = create_md_parser(MdParserConfig(heading_anchors=args.level), RendererHTML)
33+
parser = create_md_parser(MdParserConfig(), RendererHTML)
34+
parser.use(anchors_plugin, max_level=args.level)
3235

33-
def _filter_plugin(state):
36+
def _filter_plugin(state: StateCore) -> None:
3437
state.tokens = [
3538
t
3639
for t in state.tokens

myst_parser/config/main.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,10 +267,10 @@ def __repr__(self) -> str:
267267
},
268268
)
269269

270-
heading_anchors: Optional[int] = dc.field(
271-
default=None,
270+
heading_anchors: int = dc.field(
271+
default=0,
272272
metadata={
273-
"validator": optional(in_([1, 2, 3, 4, 5, 6, 7])),
273+
"validator": optional(in_([0, 1, 2, 3, 4, 5, 6, 7])),
274274
"help": "Heading level depth to assign HTML anchors",
275275
},
276276
)

myst_parser/mdit_to_docutils/base.py

Lines changed: 80 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
TYPE_CHECKING,
1515
Any,
1616
Callable,
17+
Iterable,
1718
Iterator,
1819
MutableMapping,
1920
Sequence,
@@ -790,18 +791,24 @@ def blocks_mathjax_processing(self) -> bool:
790791
def render_heading(self, token: SyntaxTreeNode) -> None:
791792
"""Render a heading, e.g. `# Heading`."""
792793

793-
if self.md_env.get("match_titles", None) is False:
794-
# this can occur if a nested parse is performed by a directive
795-
# (such as an admonition) which contains a header.
796-
# this would break the document structure
797-
self.create_warning(
798-
"Disallowed nested header found, converting to rubric",
799-
MystWarnings.MD_HEADING_NESTED,
800-
line=token_line(token, default=0),
801-
append_to=self.current_node,
802-
)
794+
if (
795+
token.attrs.get("toc", None) == "false"
796+
or self.md_env.get("match_titles", None) is False
797+
):
798+
if self.md_env.get("match_titles", None) is False:
799+
# this can occur if a nested parse is performed by a directive
800+
# (such as an admonition) which contains a header.
801+
# this would break the document structure
802+
self.create_warning(
803+
"Disallowed nested header found, converting to rubric",
804+
MystWarnings.MD_HEADING_NESTED,
805+
line=token_line(token, default=0),
806+
append_to=self.current_node,
807+
)
808+
803809
rubric = nodes.rubric(token.content, "")
804810
self.add_line_and_source_path(rubric, token)
811+
self.copy_attributes(token, rubric, ("class", "id"))
805812
with self.current_node_context(rubric, append=True):
806813
self.render_children(token)
807814
return
@@ -811,6 +818,7 @@ def render_heading(self, token: SyntaxTreeNode) -> None:
811818
# create the section node
812819
new_section = nodes.section()
813820
self.add_line_and_source_path(new_section, token)
821+
self.copy_attributes(token, new_section, ("class", "id"))
814822
# if a top level section,
815823
# then add classes to set default mathjax processing to false
816824
# we then turn it back on, on a per-node basis
@@ -830,28 +838,36 @@ def render_heading(self, token: SyntaxTreeNode) -> None:
830838

831839
# create a target reference for the section, based on the heading text.
832840
# Note, this is an implicit target, meaning that it is not prioritised,
833-
# and is not stored by sphinx for ref resolution
841+
# during ref resolution, and is not stored in the document.
842+
# TODO this is purely to mimic docutils, but maybe we don't need it?
843+
# (since we have the slugify logic below)
834844
name = nodes.fully_normalize_name(title_node.astext())
835845
new_section["names"].append(name)
836846
self.document.note_implicit_target(new_section, new_section)
837847

838-
# add possible reference slug, this may be different to the standard name above,
839-
# and does not have to be normalised, so we treat it separately
840-
# TODO this id can now come from attributes, which we actually want to be explicit
841-
# I think rather than using the mdit anchors_plugin,
842-
# we should just compute them here (with the same logic)
843-
if "id" in token.attrs:
844-
slug = str(token.attrs["id"])
845-
new_section["slug"] = slug
846-
if slug in self._slug_to_section:
847-
other_node = self._slug_to_section[slug]
848+
if level <= self.md_config.heading_anchors:
849+
850+
# Create an implicit reference slug.
851+
# The problem with this reference slug,
852+
# is that it might not be in the "normalised" format required by docutils,
853+
# https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#normalized-reference-names
854+
# so we store it separately, and have separate logic than docutils
855+
# TODO maybe revisit this assumption, or improve the logic
856+
try:
857+
slug = compute_unique_slug(
858+
token,
859+
self._slug_to_section,
860+
self.md_config.heading_slug_func,
861+
)
862+
except Exception as error:
848863
self.create_warning(
849-
f"duplicate heading slug {slug!r}, other at line {other_node.line}",
850-
MystWarnings.ANCHOR_DUPE,
851-
line=new_section.line,
864+
str(error),
865+
MystWarnings.HEADING_SLUG,
866+
line=token_line(token, default=0),
867+
append_to=self.current_node,
852868
)
853869
else:
854-
# we store this for later processing on finalise
870+
new_section["slug"] = slug
855871
self._slug_to_section[slug] = new_section
856872

857873
# set the section as the current node for subsequent rendering
@@ -1842,3 +1858,42 @@ def clean_astext(node: nodes.Element) -> str:
18421858
for raw in list(findall(node)(nodes.raw)):
18431859
raw.parent.remove(raw)
18441860
return node.astext()
1861+
1862+
1863+
_SLUGIFY_CLEAN_REGEX = re.compile(r"[^\w\u4e00-\u9fff\- ]")
1864+
1865+
1866+
def default_slugify(title: str) -> str:
1867+
"""Default slugify function.
1868+
1869+
This aims to mimic the GitHub Markdown format, see:
1870+
1871+
- https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb
1872+
- https://gist.github.com/asabaylus/3071099
1873+
"""
1874+
return _SLUGIFY_CLEAN_REGEX.sub("", title.strip().lower().replace(" ", "-"))
1875+
1876+
1877+
def compute_unique_slug(
1878+
token_tree: SyntaxTreeNode,
1879+
slugs: Iterable[str],
1880+
slug_func: None | Callable[[str], str] = None,
1881+
) -> str:
1882+
"""Compute the slug for a token.
1883+
1884+
This directly mirrors the logic in `mdit_py_plugins.anchors_plugin`
1885+
"""
1886+
slug_func = default_slugify if slug_func is None else slug_func
1887+
tokens = token_tree.to_tokens()
1888+
inline_token = tokens[1]
1889+
title = "".join(
1890+
child.content
1891+
for child in (inline_token.children or [])
1892+
if child.type in ["text", "code_inline"]
1893+
)
1894+
slug = slug_func(title)
1895+
i = 1
1896+
while slug in slugs:
1897+
slug = f"{slug}-{i}"
1898+
i += 1
1899+
return slug

myst_parser/parsers/mdit.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from markdown_it import MarkdownIt
99
from markdown_it.renderer import RendererProtocol
1010
from mdit_py_plugins.amsmath import amsmath_plugin
11-
from mdit_py_plugins.anchors import anchors_plugin
1211
from mdit_py_plugins.attrs import attrs_block_plugin, attrs_plugin
1312
from mdit_py_plugins.colon_fence import colon_fence_plugin
1413
from mdit_py_plugins.deflist import deflist_plugin
@@ -113,12 +112,6 @@ def create_md_parser(
113112
md.use(attrs_plugin, after=("image",))
114113
if "attrs_block" in config.enable_extensions:
115114
md.use(attrs_block_plugin)
116-
if config.heading_anchors is not None:
117-
md.use(
118-
anchors_plugin,
119-
max_level=config.heading_anchors,
120-
slug_func=config.heading_slug_func,
121-
)
122115
for name in config.disable_syntax:
123116
md.disable(name, True)
124117

myst_parser/warnings_.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ class MystWarnings(Enum):
5151
"""A legacy domain found, which does not support `resolve_any_xref`."""
5252

5353
# extensions
54-
ANCHOR_DUPE = "anchor_dupe"
55-
"""Duplicate heading anchors generated in same document."""
54+
HEADING_SLUG = "heading_slug"
55+
"""An error occured computing a heading slug."""
5656
STRIKETHROUGH = "strikethrough"
5757
"""Strikethrough warning, since only implemented in HTML."""
5858
HTML_PARSE = "html"

tests/test_renderers/fixtures/myst-config.txt

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -433,16 +433,38 @@ My paragraph
433433
<string>:3: (WARNING/2) Multiple matches for '*:*:*:*index': key:std:label:genindex, key:std:label:modindex, key:std:label:py-modindex, ... [myst.iref_ambiguous]
434434
.
435435

436-
[heading_slug_func] --myst-heading-anchors=1 --myst-heading-slug-func=myst_parser.config.main._test_slug_func
436+
[heading_slug_func] --myst-heading-anchors=2 --myst-heading-slug-func=myst_parser.config.main._test_slug_func
437437
.
438438
# title
439439

440+
## title
441+
442+
## title a b c
443+
444+
## title *nested syntax*
445+
446+
### other
447+
440448
[reversed](#eltit)
441449
.
442-
<document ids="title" names="title" slug="eltit" source="<string>" title="title">
450+
<document dupnames="title" ids="title" slug="eltit" source="<string>" title="title">
443451
<title>
444452
title
445-
<paragraph>
446-
<reference id_link="True" refid="title">
447-
reversed
453+
<section dupnames="title" ids="title-1" slug="eltit-1">
454+
<title>
455+
title
456+
<section ids="title-a-b-c" names="title\ a\ b\ c" slug="c b a eltit">
457+
<title>
458+
title a b c
459+
<section ids="title-nested-syntax" names="title\ nested\ syntax" slug="xatnys detsen eltit">
460+
<title>
461+
title
462+
<emphasis>
463+
nested syntax
464+
<section ids="other" names="other">
465+
<title>
466+
other
467+
<paragraph>
468+
<reference id_link="True" refid="title">
469+
reversed
448470
.

tests/test_renderers/test_myst_config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ def test_cmdline(file_params: ParamTestData):
1919
"""The description is parsed as a docutils commandline"""
2020
if "url_schemes_list" in file_params.title and __version_info__ < (0, 18):
2121
pytest.skip("problematic node ids changed in docutils 0.18")
22+
if "heading_slug_func" in file_params.title and __version_info__ < (0, 18):
23+
pytest.skip("dupnames ids changed in docutils 0.18")
2224
pub = Publisher(parser=Parser())
2325
option_parser = pub.setup_option_parser()
2426
try:

0 commit comments

Comments
 (0)