Skip to content

Commit 729a810

Browse files
authored
🐛 FIX: reorder footnotes (#23)
1 parent d3cd3a9 commit 729a810

14 files changed

Lines changed: 1110 additions & 152 deletions

mdformat_footnote/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""An mdformat plugin for parsing/validating footnotes"""
22

33
__version__ = "0.1.2"
4+
__plugin_name__ = "footnote"
45

5-
from .plugin import RENDERERS, update_mdit # noqa: F401
6+
from .plugin import RENDERERS, add_cli_argument_group, update_mdit # noqa: F401

mdformat_footnote/_helpers.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
"""Helper functions for plugin configuration."""
2+
3+
from __future__ import annotations
4+
5+
from collections.abc import Mapping
6+
from typing import Any
7+
8+
from . import __plugin_name__
9+
10+
ContextOptions = Mapping[str, Any]
11+
12+
13+
def get_conf(options: ContextOptions, key: str) -> bool | str | int | None:
14+
"""Read setting from mdformat configuration Context."""
15+
if (api := options["mdformat"].get(key)) is not None:
16+
return api
17+
return options["mdformat"].get("plugin", {}).get(__plugin_name__, {}).get(key)

mdformat_footnote/_reorder.py

Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
"""Footnote ID and subId normalization logic."""
2+
3+
from __future__ import annotations
4+
5+
from dataclasses import dataclass, field
6+
import re
7+
8+
from markdown_it.rules_core import StateCore
9+
10+
_FOOTNOTE_REF_PATTERN = re.compile(r"\[\^([^\]]+)\]")
11+
12+
13+
@dataclass
14+
class _FootnoteCategories:
15+
"""Categorized footnotes for reordering."""
16+
17+
body_referenced: list[tuple[int, str, str]]
18+
nested_only: set[str]
19+
fence_only: list[str]
20+
true_orphans: list[str]
21+
22+
@property
23+
def body_labels(self) -> set[str]:
24+
return {label for _, _, label in self.body_referenced}
25+
26+
27+
@dataclass
28+
class _ReorderState:
29+
"""Mutable state for footnote reordering."""
30+
31+
old_list: dict
32+
refs: dict
33+
new_list: dict = field(default_factory=dict)
34+
old_to_new_id: dict[int, int] = field(default_factory=dict)
35+
processed: set[str] = field(default_factory=set)
36+
new_id: int = 0
37+
38+
def _find_def_by_label(self, label: str) -> dict:
39+
for fn_data in self.old_list.values():
40+
if fn_data.get("label") == label:
41+
return fn_data.copy()
42+
return {"label": label, "count": 0}
43+
44+
def _find_old_id_by_label(self, label: str) -> int | None:
45+
for old_id, fn_data in self.old_list.items():
46+
if fn_data.get("label") == label:
47+
return old_id
48+
return None
49+
50+
def add_footnote(
51+
self, label: str, label_key: str, old_id: int | None = None
52+
) -> None:
53+
"""Add a footnote to the new list and update mappings."""
54+
if label in self.processed:
55+
return
56+
57+
self.new_list[self.new_id] = self._find_def_by_label(label)
58+
59+
effective_old_id = old_id or self._find_old_id_by_label(label)
60+
if effective_old_id is not None:
61+
self.old_to_new_id[effective_old_id] = self.new_id
62+
63+
self.refs[label_key] = self.new_id
64+
self.processed.add(label)
65+
self.new_id += 1
66+
67+
68+
def _collect_refs_in_fences(tokens: list) -> list[str]:
69+
"""Collect footnote labels referenced in fence tokens, preserving order."""
70+
refs: list[str] = []
71+
seen: set[str] = set()
72+
for token in tokens:
73+
if token.type != "fence" or not token.content:
74+
continue
75+
for match in _FOOTNOTE_REF_PATTERN.finditer(token.content):
76+
label = match.group(1)
77+
if label not in seen:
78+
refs.append(label)
79+
seen.add(label)
80+
return refs
81+
82+
83+
def _build_dependency_graph(tokens: list) -> dict[str, set[str]]:
84+
"""Build a graph of which footnotes reference which others."""
85+
graph: dict[str, set[str]] = {}
86+
current_def_label: str | None = None
87+
88+
for token in tokens:
89+
match token.type:
90+
case "footnote_reference_open":
91+
current_def_label = token.meta.get("label")
92+
if current_def_label:
93+
graph.setdefault(current_def_label, set())
94+
case "footnote_reference_close":
95+
current_def_label = None
96+
case _ if current_def_label is not None:
97+
_collect_nested_refs(token, graph[current_def_label])
98+
99+
return graph
100+
101+
102+
def _collect_nested_refs(token, ref_set: set[str]) -> None:
103+
"""Collect footnote labels referenced from a token and its children."""
104+
if token.type == "footnote_ref" and token.meta:
105+
ref_set.add(token.meta["label"])
106+
for child in token.children or []:
107+
_collect_nested_refs(child, ref_set)
108+
109+
110+
def _categorize_footnotes(
111+
refs: dict,
112+
footnote_deps: dict[str, set[str]],
113+
refs_in_fences: list[str],
114+
) -> _FootnoteCategories:
115+
"""Categorize footnotes."""
116+
referenced_by_footnotes: set[str] = set()
117+
for refs_set in footnote_deps.values():
118+
referenced_by_footnotes.update(refs_set)
119+
120+
refs_in_fences_set = set(refs_in_fences)
121+
122+
body_referenced: list[tuple[int, str, str]] = []
123+
nested_only: set[str] = set()
124+
fence_only_set: set[str] = set()
125+
true_orphans: list[str] = []
126+
127+
for label_key, old_id in refs.items():
128+
label = label_key[1:]
129+
match (
130+
old_id >= 0,
131+
label in referenced_by_footnotes,
132+
label in refs_in_fences_set,
133+
):
134+
case (True, _, _):
135+
body_referenced.append((old_id, label_key, label))
136+
case (False, True, _):
137+
nested_only.add(label)
138+
case (False, False, True):
139+
fence_only_set.add(label)
140+
case _:
141+
true_orphans.append(label_key)
142+
143+
body_referenced.sort(key=lambda x: x[0])
144+
fence_only = [label for label in refs_in_fences if label in fence_only_set]
145+
146+
return _FootnoteCategories(body_referenced, nested_only, fence_only, true_orphans)
147+
148+
149+
def _process_nested_for_parent(
150+
parent_label: str,
151+
footnote_deps: dict[str, set[str]],
152+
state: _ReorderState,
153+
skip_labels: set[str],
154+
) -> None:
155+
"""Process nested footnotes referenced by a parent footnote."""
156+
for nested_label in footnote_deps.get(parent_label, []):
157+
if nested_label not in skip_labels:
158+
state.add_footnote(nested_label, f":{nested_label}")
159+
160+
161+
def _build_reordered_list(
162+
categories: _FootnoteCategories,
163+
footnote_deps: dict[str, set[str]],
164+
old_list: dict,
165+
refs: dict,
166+
keep_orphans: bool,
167+
) -> _ReorderState:
168+
"""Build the reordered footnote list from categorized footnotes."""
169+
state = _ReorderState(old_list=old_list, refs=refs)
170+
skip_labels = categories.body_labels | set(categories.true_orphans)
171+
172+
for old_id, label_key, label in categories.body_referenced:
173+
state.add_footnote(label, label_key, old_id)
174+
_process_nested_for_parent(label, footnote_deps, state, skip_labels)
175+
176+
for nested_label in categories.nested_only:
177+
state.add_footnote(nested_label, f":{nested_label}")
178+
179+
for fence_label in categories.fence_only:
180+
state.add_footnote(fence_label, f":{fence_label}")
181+
182+
if keep_orphans:
183+
for orphan_key in categories.true_orphans:
184+
state.add_footnote(orphan_key[1:], orphan_key)
185+
186+
return state
187+
188+
189+
def _update_token_ids(tokens: list, old_to_new_id: dict[int, int]) -> None:
190+
"""Recursively update footnote IDs in tokens."""
191+
for token in tokens:
192+
if token.type in ("footnote_ref", "footnote_anchor"):
193+
if token.meta and (old_id := token.meta.get("id")) in old_to_new_id:
194+
token.meta["id"] = old_to_new_id[old_id]
195+
for child in token.children or []:
196+
_update_token_ids([child], old_to_new_id)
197+
198+
199+
def _partition_refs_by_context(tokens: list) -> tuple[list, dict[str, list]]:
200+
"""Partition footnote refs into body refs and definition refs."""
201+
body_refs: list = []
202+
def_refs: dict[str, list] = {}
203+
current_def_label: str | None = None
204+
205+
for token in tokens:
206+
match token.type:
207+
case "footnote_reference_open":
208+
current_def_label = token.meta.get("label")
209+
if current_def_label:
210+
def_refs.setdefault(current_def_label, [])
211+
case "footnote_reference_close":
212+
current_def_label = None
213+
case _ if current_def_label is None:
214+
_collect_refs(token, body_refs)
215+
case _:
216+
_collect_refs(token, def_refs.setdefault(current_def_label, []))
217+
218+
return body_refs, def_refs
219+
220+
221+
def _assign_subids_to_refs(ref_tokens: list, counters: dict[int, int]) -> None:
222+
"""Assign sequential subIds to a list of ref tokens."""
223+
for ref_token in ref_tokens:
224+
fn_id = ref_token.meta["id"]
225+
ref_token.meta["subId"] = counters.get(fn_id, 0)
226+
counters[fn_id] = counters.get(fn_id, 0) + 1
227+
228+
229+
def _reassign_subids(tokens: list, refs: dict, footnote_list: dict) -> None:
230+
"""Reassign subIds based on output order: body refs first, then definition refs."""
231+
body_refs, def_refs = _partition_refs_by_context(tokens)
232+
subid_counters: dict[int, int] = {}
233+
234+
_assign_subids_to_refs(body_refs, subid_counters)
235+
236+
for label_key in refs:
237+
label = label_key[1:]
238+
if label in def_refs:
239+
_assign_subids_to_refs(def_refs[label], subid_counters)
240+
241+
for fn_id, count in subid_counters.items():
242+
if fn_id in footnote_list:
243+
footnote_list[fn_id]["count"] = count
244+
245+
246+
def _collect_refs(token, ref_list: list) -> None:
247+
"""Collect footnote_ref tokens from a token and its children."""
248+
if token.type == "footnote_ref" and token.meta:
249+
ref_list.append(token)
250+
for child in token.children or []:
251+
_collect_refs(child, ref_list)
252+
253+
254+
def _get_footnote_data(state: StateCore) -> tuple[dict, dict] | None:
255+
"""Extract footnote refs and list from state, or None if missing."""
256+
footnote_data = state.env.get("footnotes", {})
257+
refs = footnote_data.get("refs", {})
258+
if not refs:
259+
return None
260+
return refs, footnote_data.get("list", {})
261+
262+
263+
def reorder_footnotes_by_definition(
264+
state: StateCore, keep_orphans: bool = False
265+
) -> None:
266+
"""Reorder footnotes by reference order, fix IDs, and handle orphans."""
267+
if (data := _get_footnote_data(state)) is None:
268+
return
269+
270+
refs, old_list = data
271+
footnote_deps = _build_dependency_graph(state.tokens)
272+
refs_in_fences = _collect_refs_in_fences(state.tokens)
273+
categories = _categorize_footnotes(refs, footnote_deps, refs_in_fences)
274+
275+
if not keep_orphans:
276+
for orphan_key in categories.true_orphans:
277+
del refs[orphan_key]
278+
279+
reorder_state = _build_reordered_list(
280+
categories, footnote_deps, old_list, refs, keep_orphans
281+
)
282+
283+
state.env["footnotes"]["list"] = reorder_state.new_list
284+
_update_token_ids(state.tokens, reorder_state.old_to_new_id)
285+
_reassign_subids(state.tokens, refs, reorder_state.new_list)

mdformat_footnote/plugin.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,52 @@
11
from __future__ import annotations
22

3+
import argparse
34
from collections.abc import Mapping
5+
from functools import partial
46
import textwrap
57

68
from markdown_it import MarkdownIt
79
from mdformat.renderer import RenderContext, RenderTreeNode
810
from mdformat.renderer.typing import Render
911
from mdit_py_plugins.footnote import footnote_plugin
1012

13+
from ._helpers import ContextOptions, get_conf
14+
from ._reorder import reorder_footnotes_by_definition
15+
16+
17+
def _keep_orphans(options: ContextOptions) -> bool:
18+
"""Check if orphan footnotes should be preserved."""
19+
return bool(get_conf(options, "keep_orphans")) or False
20+
21+
22+
def add_cli_argument_group(group: argparse._ArgumentGroup) -> None:
23+
"""Add options to the mdformat CLI.
24+
25+
Stored in `mdit.options["mdformat"]["plugin"]["footnote"]`
26+
"""
27+
group.add_argument(
28+
"--keep-footnote-orphans",
29+
action="store_const",
30+
const=True,
31+
dest="keep_orphans",
32+
help=(
33+
"Keep footnote definitions that are never referenced "
34+
"(default: remove them)"
35+
),
36+
)
37+
1138

1239
def update_mdit(mdit: MarkdownIt) -> None:
1340
"""Update the parser, adding the footnote plugin."""
1441
mdit.use(footnote_plugin)
1542
# Disable inline footnotes for now, since we don't have rendering
1643
# support for them yet.
1744
mdit.disable("footnote_inline")
45+
# Reorder footnotes by reference order, fix IDs, and handle orphans.
46+
# Must run before footnote_tail.
47+
keep_orphans = _keep_orphans(mdit.options)
48+
reorder_fn = partial(reorder_footnotes_by_definition, keep_orphans=keep_orphans)
49+
mdit.core.ruler.before("footnote_tail", "reorder_footnotes", reorder_fn)
1850

1951

2052
def _footnote_ref_renderer(node: RenderTreeNode, context: RenderContext) -> str:

tests/fixture_helpers.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""Helper utilities for loading test fixtures."""
2+
3+
from pathlib import Path
4+
5+
from markdown_it.utils import read_fixture_file
6+
7+
8+
def load_fixtures(filename: str) -> list[tuple[int, str, str, str]]:
9+
"""Load fixtures from a file in tests/fixtures/ directory."""
10+
fixture_path = Path(__file__).parent / "fixtures" / filename
11+
return read_fixture_file(fixture_path)
12+
13+
14+
def get_fixture(filename: str, title: str) -> tuple[str, str]:
15+
"""Get a specific fixture by title from a file."""
16+
fixtures = load_fixtures(filename)
17+
for _, fixture_title, input_text, expected_output in fixtures:
18+
if fixture_title == title:
19+
return input_text, expected_output
20+
available = [f[1] for f in fixtures]
21+
raise ValueError(
22+
f"Fixture '{title}' not found in {filename}. Available: {available}"
23+
)

0 commit comments

Comments
 (0)