Skip to content

Commit 01e12c2

Browse files
authored
Fix nested parsing of reference text (#152)
1 parent eed7be4 commit 01e12c2

24 files changed

Lines changed: 698 additions & 44 deletions

docs/using/syntax.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -757,14 +757,22 @@ Alternatively using the markdown syntax:
757757
[my text](header_target)
758758
```
759759
760-
is synonymous with using the [any inline role](https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-any):
760+
is equivalent to using the [any inline role](https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-any):
761761
762762
```md
763763
{any}`my text <header_target>`
764764
```
765765
766-
Using the same example, see this ref: [](syntax/targets), and here's a ref back to the top of
767-
this page: [my text](example_syntax).
766+
but can also accept "nested" syntax (like bold text) and will recognise document paths that include extensions (e.g. `using/syntax` or `using/syntax.md`)
767+
768+
Using the same example, see this ref: [](syntax/targets), here is a reference back to the top of
769+
this page: [my text with **nested** $\alpha$ syntax](example_syntax), and here is a reference to another page (`[](intro.md)`): [](intro.md).
770+
771+
```{note}
772+
If you wish to have the target's title inserted into your text, you can
773+
leave the "text" section of the markdown link empty. For example, this
774+
markdown: `[](syntax.md)` will result in: [](syntax.md).
775+
```
768776
769777
(syntax/footnotes)=
770778

myst_parser/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
def setup(app):
55
"""Initialize Sphinx extension."""
66
from myst_parser.sphinx_parser import MystParser
7+
from myst_parser.myst_refs import MystReferenceResolver
78

89
app.add_source_suffix(".md", "markdown")
910
app.add_source_parser(MystParser)
1011
app.add_config_value("myst_config", {}, "env")
12+
app.add_post_transform(MystReferenceResolver)
1113

1214
return {"version": __version__, "parallel_read_safe": True}

myst_parser/myst_refs.py

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
"""A post-transform for overriding the behaviour of sphinx reference resolution.
2+
3+
This is applied to MyST type references only, such as ``[text](target)``,
4+
and allows for nested syntax
5+
"""
6+
import os
7+
from typing import Any, List, Tuple
8+
from typing import cast
9+
10+
from docutils import nodes
11+
from docutils.nodes import document, Element
12+
13+
from sphinx import addnodes
14+
from sphinx.addnodes import pending_xref
15+
from sphinx.errors import NoUri
16+
from sphinx.locale import __
17+
from sphinx.transforms.post_transforms import ReferencesResolver
18+
from sphinx.util import docname_join, logging
19+
from sphinx.util.nodes import clean_astext, make_refnode
20+
21+
logger = logging.getLogger(__name__)
22+
23+
24+
class MystReferenceResolver(ReferencesResolver):
25+
"""Resolves cross-references on doctrees.
26+
27+
Overrides default sphinx implementation, to allow for nested syntax
28+
"""
29+
30+
default_priority = 9 # higher priority than ReferencesResolver (10)
31+
32+
def run(self, **kwargs: Any) -> None:
33+
self.document: document
34+
for node in self.document.traverse(addnodes.pending_xref):
35+
if node["reftype"] != "myst":
36+
continue
37+
38+
contnode = cast(nodes.TextElement, node[0].deepcopy())
39+
newnode = None
40+
41+
typ = node["reftype"]
42+
target = node["reftarget"]
43+
refdoc = node.get("refdoc", self.env.docname)
44+
domain = None
45+
46+
try:
47+
newnode = self.resolve_myst_ref(refdoc, node, contnode)
48+
# no new node found? try the missing-reference event
49+
if newnode is None:
50+
newnode = self.app.emit_firstresult(
51+
"missing-reference", self.env, node, contnode
52+
)
53+
# still not found? warn if node wishes to be warned about or
54+
# we are in nit-picky mode
55+
if newnode is None:
56+
self.warn_missing_reference(refdoc, typ, target, node, domain)
57+
except NoUri:
58+
newnode = contnode
59+
60+
node.replace_self(newnode or contnode)
61+
62+
def _resolve_ref_nested(self, node: pending_xref, fromdocname: str) -> Element:
63+
"""This is the same as ``sphinx.domains.std._resolve_ref_xref``,
64+
but allows for nested syntax,
65+
rather than converting the inner nodes to raw text.
66+
"""
67+
stddomain = self.env.get_domain("std")
68+
target = node["reftarget"].lower()
69+
70+
if node["refexplicit"]:
71+
# reference to anonymous label; the reference uses
72+
# the supplied link caption
73+
docname, labelid = stddomain.anonlabels.get(target, ("", ""))
74+
sectname = node.astext()
75+
innernode = nodes.inline(sectname, "")
76+
innernode.extend(node[0].children)
77+
else:
78+
# reference to named label; the final node will
79+
# contain the section name after the label
80+
docname, labelid, sectname = stddomain.labels.get(target, ("", "", ""))
81+
innernode = nodes.inline(sectname, sectname)
82+
83+
if not docname:
84+
return None
85+
86+
return make_refnode(self.app.builder, fromdocname, docname, labelid, innernode)
87+
88+
def _resolve_doc_nested(self, node: pending_xref, fromdocname: str) -> Element:
89+
"""This is the same as ``sphinx.domains.std._resolve_doc_xref``,
90+
but allows for nested syntax,
91+
rather than converting the inner nodes to raw text.
92+
93+
It also allows for extensions on document names.
94+
"""
95+
# directly reference to document by source name; can be absolute or relative
96+
refdoc = node.get("refdoc", fromdocname)
97+
docname = docname_join(refdoc, node["reftarget"])
98+
99+
if docname not in self.env.all_docs:
100+
# try stripping known extensions from doc name
101+
if os.path.splitext(docname)[1] in self.env.config.source_suffix:
102+
docname = os.path.splitext(docname)[0]
103+
if docname not in self.env.all_docs:
104+
return None
105+
106+
if node["refexplicit"]:
107+
# reference with explicit title
108+
caption = node.astext()
109+
innernode = nodes.inline(caption, "", classes=["doc"])
110+
innernode.extend(node[0].children)
111+
else:
112+
# TODO do we want nested syntax for titles?
113+
caption = clean_astext(self.env.titles[docname])
114+
innernode = nodes.inline(caption, caption, classes=["doc"])
115+
116+
return make_refnode(self.app.builder, fromdocname, docname, None, innernode)
117+
118+
def resolve_myst_ref(
119+
self, refdoc: str, node: pending_xref, contnode: Element
120+
) -> Element:
121+
"""Resolve reference generated by the "myst" role."""
122+
123+
stddomain = self.env.get_domain("std")
124+
target = node["reftarget"]
125+
results = [] # type: List[Tuple[str, Element]]
126+
127+
# resolve standard references first
128+
res = self._resolve_ref_nested(node, refdoc)
129+
if res:
130+
results.append(("std:ref", res))
131+
132+
# next resolve doc names
133+
res = self._resolve_doc_nested(node, refdoc)
134+
if res:
135+
results.append(("std:doc", res))
136+
137+
# next resolve for any other standard reference object
138+
for objtype in stddomain.object_types:
139+
key = (objtype, target)
140+
if objtype == "term":
141+
key = (objtype, target.lower())
142+
if key in stddomain.objects:
143+
docname, labelid = stddomain.objects[key]
144+
domain_role = "std:" + stddomain.role_for_objtype(objtype)
145+
ref_node = make_refnode(
146+
self.app.builder, refdoc, docname, labelid, contnode
147+
)
148+
results.append((domain_role, ref_node))
149+
150+
# finally resolve for any other type of reference
151+
# TODO do we want to restrict this?
152+
for domain in self.env.domains.values():
153+
if domain.name == "std":
154+
continue # we did this one already
155+
try:
156+
results.extend(
157+
domain.resolve_any_xref(
158+
self.env, refdoc, self.app.builder, target, node, contnode
159+
)
160+
)
161+
except NotImplementedError:
162+
# the domain doesn't yet support the new interface
163+
# we have to manually collect possible references (SLOW)
164+
for role in domain.roles:
165+
res = domain.resolve_xref(
166+
self.env, refdoc, self.app.builder, role, target, node, contnode
167+
)
168+
if res and isinstance(res[0], nodes.Element):
169+
results.append((f"{domain.name}:{role}", res))
170+
171+
# now, see how many matches we got...
172+
if not results:
173+
return None
174+
if len(results) > 1:
175+
176+
def stringify(name, node):
177+
reftitle = node.get("reftitle", node.astext())
178+
return f":{name}:`{reftitle}`"
179+
180+
candidates = " or ".join(stringify(name, role) for name, role in results)
181+
logger.warning(
182+
__(
183+
f"more than one target found for 'myst' cross-reference {target}: "
184+
f"could be {candidates}"
185+
),
186+
location=node,
187+
)
188+
189+
res_role, newnode = results[0]
190+
# Override "myst" class with the actual role type to get the styling
191+
# approximately correct.
192+
res_domain = res_role.split(":")[0]
193+
if len(newnode) > 0 and isinstance(newnode[0], nodes.Element):
194+
newnode[0]["classes"] = newnode[0].get("classes", []) + [
195+
res_domain,
196+
res_role.replace(":", "-"),
197+
]
198+
199+
return newnode

myst_parser/sphinx_parser.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1+
from os import path
2+
import time
3+
14
from docutils import frontend, nodes
5+
from docutils.core import publish_doctree
6+
from sphinx.application import Sphinx
7+
from sphinx.io import SphinxStandaloneReader
28
from sphinx.parsers import Parser
39
from sphinx.util import logging
10+
from sphinx.util.docutils import sphinx_domains
411

512
from myst_parser.main import to_docutils
613

@@ -200,3 +207,22 @@ def parse(self, inputstring: str, document: nodes.document):
200207
disable_syntax=self.config["disable_syntax"] or [],
201208
math_delimiters=self.config["math_delimiters"],
202209
)
210+
211+
212+
def parse(app: Sphinx, text: str, docname: str = "index") -> nodes.document:
213+
"""Parse a string as MystMarkdown with Sphinx application."""
214+
app.env.temp_data["docname"] = docname
215+
app.env.all_docs[docname] = time.time()
216+
reader = SphinxStandaloneReader()
217+
reader.setup(app)
218+
parser = MystParser()
219+
parser.set_application(app)
220+
with sphinx_domains(app.env):
221+
return publish_doctree(
222+
text,
223+
path.join(app.srcdir, docname + ".md"),
224+
reader=reader,
225+
parser=parser,
226+
parser_name="markdown",
227+
settings_overrides={"env": app.env, "gettext_compact": True},
228+
)

myst_parser/sphinx_renderer.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def handle_cross_reference(self, token, destination):
3131
"""Create nodes for references that are not immediately resolvable."""
3232
wrap_node = addnodes.pending_xref(
3333
reftarget=unquote(destination),
34-
reftype="any",
34+
reftype="myst",
3535
refdomain=None, # Added to enable cross-linking
3636
refexplicit=len(token.children) > 0,
3737
refwarn=True,
@@ -41,9 +41,10 @@ def handle_cross_reference(self, token, destination):
4141
if title:
4242
wrap_node["title"] = title
4343
self.current_node.append(wrap_node)
44-
text_node = nodes.literal("", "", classes=["xref", "any"])
45-
wrap_node.append(text_node)
46-
with self.current_node_context(text_node):
44+
45+
inner_node = nodes.inline("", "", classes=["xref", "myst"])
46+
wrap_node.append(inner_node)
47+
with self.current_node_context(inner_node):
4748
self.render_children(token)
4849

4950
def render_math_block_eqno(self, token):
@@ -76,7 +77,7 @@ def add_math_target(self, node):
7677
return target
7778

7879

79-
def minimal_sphinx_app(configuration=None, sourcedir=None):
80+
def minimal_sphinx_app(configuration=None, sourcedir=None, with_builder=False):
8081
"""Create a minimal Sphinx environment; loading sphinx roles, directives, etc.
8182
"""
8283

@@ -106,7 +107,7 @@ def __init__(self, confoverrides=None, srcdir=None):
106107
self.env.temp_data["docname"] = "mock_docname"
107108
self.builder = None
108109

109-
if not confoverrides:
110+
if not with_builder:
110111
return
111112

112113
# this code is only required for more complex parsing with extensions
@@ -128,7 +129,7 @@ def __init__(self, confoverrides=None, srcdir=None):
128129

129130

130131
@contextmanager
131-
def mock_sphinx_env(conf=None, srcdir=None, document=None):
132+
def mock_sphinx_env(conf=None, srcdir=None, document=None, with_builder=False):
132133
"""Set up an environment, to parse sphinx roles/directives,
133134
outside of a `sphinx-build`.
134135
@@ -144,7 +145,9 @@ def mock_sphinx_env(conf=None, srcdir=None, document=None):
144145
_roles = copy.copy(roles._roles)
145146
# Monkey-patch directive and role dispatch,
146147
# so that sphinx domain-specific markup takes precedence.
147-
app = minimal_sphinx_app(configuration=conf, sourcedir=srcdir)
148+
app = minimal_sphinx_app(
149+
configuration=conf, sourcedir=srcdir, with_builder=with_builder
150+
)
148151
_sphinx_domains = sphinx_domains(app.env)
149152
_sphinx_domains.enable()
150153
if document is not None:

tests/test_renderers/fixtures/syntax_elements.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ bar$ b
285285
.
286286
<document source="notset">
287287
<paragraph>
288-
a
288+
a
289289
<math>
290290
foo
291291
bar
@@ -390,12 +390,12 @@ Title
390390
<title>
391391
Title
392392
<paragraph>
393-
<pending_xref refdomain="True" refexplicit="True" reftarget="target" reftype="any" refwarn="True">
394-
<literal classes="xref any">
393+
<pending_xref refdomain="True" refexplicit="True" reftarget="target" reftype="myst" refwarn="True">
394+
<inline classes="xref myst">
395395
alt1
396396
<paragraph>
397-
<pending_xref refdomain="True" refexplicit="False" reftarget="target2" reftype="any" refwarn="True">
398-
<literal classes="xref any">
397+
<pending_xref refdomain="True" refexplicit="False" reftarget="target2" reftype="myst" refwarn="True">
398+
<inline classes="xref myst">
399399
<paragraph>
400400
<reference refuri="https://www.google.com">
401401
alt2
@@ -489,8 +489,8 @@ Link Definition in directive:
489489
<document source="notset">
490490
<note>
491491
<paragraph>
492-
<pending_xref refdomain="True" refexplicit="True" reftarget="link" reftype="any" refwarn="True">
493-
<literal classes="xref any">
492+
<pending_xref refdomain="True" refexplicit="True" reftarget="link" reftype="myst" refwarn="True">
493+
<inline classes="xref myst">
494494
a
495495
.
496496
@@ -514,8 +514,8 @@ Link Definition in nested directives:
514514
<note>
515515
<note>
516516
<paragraph>
517-
<pending_xref refdomain="True" refexplicit="True" reftarget="link" reftype="any" refwarn="True">
518-
<literal classes="xref any">
517+
<pending_xref refdomain="True" refexplicit="True" reftarget="link" reftype="myst" refwarn="True">
518+
<inline classes="xref myst">
519519
ref1
520520
521521
[ref2]
@@ -699,6 +699,6 @@ a = 1
699699
<literal_block language="::python" xml:space="preserve">
700700
a = 1
701701
<paragraph>
702-
<pending_xref refdomain="True" refexplicit="False" reftarget="target" reftype="any" refwarn="True">
703-
<literal classes="xref any">
702+
<pending_xref refdomain="True" refexplicit="False" reftarget="target" reftype="myst" refwarn="True">
703+
<inline classes="xref myst">
704704
.

0 commit comments

Comments
 (0)