|
| 1 | +"""Footnote ID and subId normalization logic.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +from dataclasses import dataclass, field |
| 6 | +import re |
| 7 | + |
| 8 | +from markdown_it.rules_core import StateCore |
| 9 | + |
| 10 | +_FOOTNOTE_REF_PATTERN = re.compile(r"\[\^([^\]]+)\]") |
| 11 | + |
| 12 | + |
| 13 | +@dataclass |
| 14 | +class _FootnoteCategories: |
| 15 | + """Categorized footnotes for reordering.""" |
| 16 | + |
| 17 | + body_referenced: list[tuple[int, str, str]] |
| 18 | + nested_only: set[str] |
| 19 | + fence_only: list[str] |
| 20 | + true_orphans: list[str] |
| 21 | + |
| 22 | + @property |
| 23 | + def body_labels(self) -> set[str]: |
| 24 | + return {label for _, _, label in self.body_referenced} |
| 25 | + |
| 26 | + |
| 27 | +@dataclass |
| 28 | +class _ReorderState: |
| 29 | + """Mutable state for footnote reordering.""" |
| 30 | + |
| 31 | + old_list: dict |
| 32 | + refs: dict |
| 33 | + new_list: dict = field(default_factory=dict) |
| 34 | + old_to_new_id: dict[int, int] = field(default_factory=dict) |
| 35 | + processed: set[str] = field(default_factory=set) |
| 36 | + new_id: int = 0 |
| 37 | + |
| 38 | + def _find_def_by_label(self, label: str) -> dict: |
| 39 | + for fn_data in self.old_list.values(): |
| 40 | + if fn_data.get("label") == label: |
| 41 | + return fn_data.copy() |
| 42 | + return {"label": label, "count": 0} |
| 43 | + |
| 44 | + def _find_old_id_by_label(self, label: str) -> int | None: |
| 45 | + for old_id, fn_data in self.old_list.items(): |
| 46 | + if fn_data.get("label") == label: |
| 47 | + return old_id |
| 48 | + return None |
| 49 | + |
| 50 | + def add_footnote( |
| 51 | + self, label: str, label_key: str, old_id: int | None = None |
| 52 | + ) -> None: |
| 53 | + """Add a footnote to the new list and update mappings.""" |
| 54 | + if label in self.processed: |
| 55 | + return |
| 56 | + |
| 57 | + self.new_list[self.new_id] = self._find_def_by_label(label) |
| 58 | + |
| 59 | + effective_old_id = old_id or self._find_old_id_by_label(label) |
| 60 | + if effective_old_id is not None: |
| 61 | + self.old_to_new_id[effective_old_id] = self.new_id |
| 62 | + |
| 63 | + self.refs[label_key] = self.new_id |
| 64 | + self.processed.add(label) |
| 65 | + self.new_id += 1 |
| 66 | + |
| 67 | + |
| 68 | +def _collect_refs_in_fences(tokens: list) -> list[str]: |
| 69 | + """Collect footnote labels referenced in fence tokens, preserving order.""" |
| 70 | + refs: list[str] = [] |
| 71 | + seen: set[str] = set() |
| 72 | + for token in tokens: |
| 73 | + if token.type != "fence" or not token.content: |
| 74 | + continue |
| 75 | + for match in _FOOTNOTE_REF_PATTERN.finditer(token.content): |
| 76 | + label = match.group(1) |
| 77 | + if label not in seen: |
| 78 | + refs.append(label) |
| 79 | + seen.add(label) |
| 80 | + return refs |
| 81 | + |
| 82 | + |
| 83 | +def _build_dependency_graph(tokens: list) -> dict[str, set[str]]: |
| 84 | + """Build a graph of which footnotes reference which others.""" |
| 85 | + graph: dict[str, set[str]] = {} |
| 86 | + current_def_label: str | None = None |
| 87 | + |
| 88 | + for token in tokens: |
| 89 | + match token.type: |
| 90 | + case "footnote_reference_open": |
| 91 | + current_def_label = token.meta.get("label") |
| 92 | + if current_def_label: |
| 93 | + graph.setdefault(current_def_label, set()) |
| 94 | + case "footnote_reference_close": |
| 95 | + current_def_label = None |
| 96 | + case _ if current_def_label is not None: |
| 97 | + _collect_nested_refs(token, graph[current_def_label]) |
| 98 | + |
| 99 | + return graph |
| 100 | + |
| 101 | + |
| 102 | +def _collect_nested_refs(token, ref_set: set[str]) -> None: |
| 103 | + """Collect footnote labels referenced from a token and its children.""" |
| 104 | + if token.type == "footnote_ref" and token.meta: |
| 105 | + ref_set.add(token.meta["label"]) |
| 106 | + for child in token.children or []: |
| 107 | + _collect_nested_refs(child, ref_set) |
| 108 | + |
| 109 | + |
| 110 | +def _categorize_footnotes( |
| 111 | + refs: dict, |
| 112 | + footnote_deps: dict[str, set[str]], |
| 113 | + refs_in_fences: list[str], |
| 114 | +) -> _FootnoteCategories: |
| 115 | + """Categorize footnotes.""" |
| 116 | + referenced_by_footnotes: set[str] = set() |
| 117 | + for refs_set in footnote_deps.values(): |
| 118 | + referenced_by_footnotes.update(refs_set) |
| 119 | + |
| 120 | + refs_in_fences_set = set(refs_in_fences) |
| 121 | + |
| 122 | + body_referenced: list[tuple[int, str, str]] = [] |
| 123 | + nested_only: set[str] = set() |
| 124 | + fence_only_set: set[str] = set() |
| 125 | + true_orphans: list[str] = [] |
| 126 | + |
| 127 | + for label_key, old_id in refs.items(): |
| 128 | + label = label_key[1:] |
| 129 | + match ( |
| 130 | + old_id >= 0, |
| 131 | + label in referenced_by_footnotes, |
| 132 | + label in refs_in_fences_set, |
| 133 | + ): |
| 134 | + case (True, _, _): |
| 135 | + body_referenced.append((old_id, label_key, label)) |
| 136 | + case (False, True, _): |
| 137 | + nested_only.add(label) |
| 138 | + case (False, False, True): |
| 139 | + fence_only_set.add(label) |
| 140 | + case _: |
| 141 | + true_orphans.append(label_key) |
| 142 | + |
| 143 | + body_referenced.sort(key=lambda x: x[0]) |
| 144 | + fence_only = [label for label in refs_in_fences if label in fence_only_set] |
| 145 | + |
| 146 | + return _FootnoteCategories(body_referenced, nested_only, fence_only, true_orphans) |
| 147 | + |
| 148 | + |
| 149 | +def _process_nested_for_parent( |
| 150 | + parent_label: str, |
| 151 | + footnote_deps: dict[str, set[str]], |
| 152 | + state: _ReorderState, |
| 153 | + skip_labels: set[str], |
| 154 | +) -> None: |
| 155 | + """Process nested footnotes referenced by a parent footnote.""" |
| 156 | + for nested_label in footnote_deps.get(parent_label, []): |
| 157 | + if nested_label not in skip_labels: |
| 158 | + state.add_footnote(nested_label, f":{nested_label}") |
| 159 | + |
| 160 | + |
| 161 | +def _build_reordered_list( |
| 162 | + categories: _FootnoteCategories, |
| 163 | + footnote_deps: dict[str, set[str]], |
| 164 | + old_list: dict, |
| 165 | + refs: dict, |
| 166 | + keep_orphans: bool, |
| 167 | +) -> _ReorderState: |
| 168 | + """Build the reordered footnote list from categorized footnotes.""" |
| 169 | + state = _ReorderState(old_list=old_list, refs=refs) |
| 170 | + skip_labels = categories.body_labels | set(categories.true_orphans) |
| 171 | + |
| 172 | + for old_id, label_key, label in categories.body_referenced: |
| 173 | + state.add_footnote(label, label_key, old_id) |
| 174 | + _process_nested_for_parent(label, footnote_deps, state, skip_labels) |
| 175 | + |
| 176 | + for nested_label in categories.nested_only: |
| 177 | + state.add_footnote(nested_label, f":{nested_label}") |
| 178 | + |
| 179 | + for fence_label in categories.fence_only: |
| 180 | + state.add_footnote(fence_label, f":{fence_label}") |
| 181 | + |
| 182 | + if keep_orphans: |
| 183 | + for orphan_key in categories.true_orphans: |
| 184 | + state.add_footnote(orphan_key[1:], orphan_key) |
| 185 | + |
| 186 | + return state |
| 187 | + |
| 188 | + |
| 189 | +def _update_token_ids(tokens: list, old_to_new_id: dict[int, int]) -> None: |
| 190 | + """Recursively update footnote IDs in tokens.""" |
| 191 | + for token in tokens: |
| 192 | + if token.type in ("footnote_ref", "footnote_anchor"): |
| 193 | + if token.meta and (old_id := token.meta.get("id")) in old_to_new_id: |
| 194 | + token.meta["id"] = old_to_new_id[old_id] |
| 195 | + for child in token.children or []: |
| 196 | + _update_token_ids([child], old_to_new_id) |
| 197 | + |
| 198 | + |
| 199 | +def _partition_refs_by_context(tokens: list) -> tuple[list, dict[str, list]]: |
| 200 | + """Partition footnote refs into body refs and definition refs.""" |
| 201 | + body_refs: list = [] |
| 202 | + def_refs: dict[str, list] = {} |
| 203 | + current_def_label: str | None = None |
| 204 | + |
| 205 | + for token in tokens: |
| 206 | + match token.type: |
| 207 | + case "footnote_reference_open": |
| 208 | + current_def_label = token.meta.get("label") |
| 209 | + if current_def_label: |
| 210 | + def_refs.setdefault(current_def_label, []) |
| 211 | + case "footnote_reference_close": |
| 212 | + current_def_label = None |
| 213 | + case _ if current_def_label is None: |
| 214 | + _collect_refs(token, body_refs) |
| 215 | + case _: |
| 216 | + _collect_refs(token, def_refs.setdefault(current_def_label, [])) |
| 217 | + |
| 218 | + return body_refs, def_refs |
| 219 | + |
| 220 | + |
| 221 | +def _assign_subids_to_refs(ref_tokens: list, counters: dict[int, int]) -> None: |
| 222 | + """Assign sequential subIds to a list of ref tokens.""" |
| 223 | + for ref_token in ref_tokens: |
| 224 | + fn_id = ref_token.meta["id"] |
| 225 | + ref_token.meta["subId"] = counters.get(fn_id, 0) |
| 226 | + counters[fn_id] = counters.get(fn_id, 0) + 1 |
| 227 | + |
| 228 | + |
| 229 | +def _reassign_subids(tokens: list, refs: dict, footnote_list: dict) -> None: |
| 230 | + """Reassign subIds based on output order: body refs first, then definition refs.""" |
| 231 | + body_refs, def_refs = _partition_refs_by_context(tokens) |
| 232 | + subid_counters: dict[int, int] = {} |
| 233 | + |
| 234 | + _assign_subids_to_refs(body_refs, subid_counters) |
| 235 | + |
| 236 | + for label_key in refs: |
| 237 | + label = label_key[1:] |
| 238 | + if label in def_refs: |
| 239 | + _assign_subids_to_refs(def_refs[label], subid_counters) |
| 240 | + |
| 241 | + for fn_id, count in subid_counters.items(): |
| 242 | + if fn_id in footnote_list: |
| 243 | + footnote_list[fn_id]["count"] = count |
| 244 | + |
| 245 | + |
| 246 | +def _collect_refs(token, ref_list: list) -> None: |
| 247 | + """Collect footnote_ref tokens from a token and its children.""" |
| 248 | + if token.type == "footnote_ref" and token.meta: |
| 249 | + ref_list.append(token) |
| 250 | + for child in token.children or []: |
| 251 | + _collect_refs(child, ref_list) |
| 252 | + |
| 253 | + |
| 254 | +def _get_footnote_data(state: StateCore) -> tuple[dict, dict] | None: |
| 255 | + """Extract footnote refs and list from state, or None if missing.""" |
| 256 | + footnote_data = state.env.get("footnotes", {}) |
| 257 | + refs = footnote_data.get("refs", {}) |
| 258 | + if not refs: |
| 259 | + return None |
| 260 | + return refs, footnote_data.get("list", {}) |
| 261 | + |
| 262 | + |
| 263 | +def reorder_footnotes_by_definition( |
| 264 | + state: StateCore, keep_orphans: bool = False |
| 265 | +) -> None: |
| 266 | + """Reorder footnotes by reference order, fix IDs, and handle orphans.""" |
| 267 | + if (data := _get_footnote_data(state)) is None: |
| 268 | + return |
| 269 | + |
| 270 | + refs, old_list = data |
| 271 | + footnote_deps = _build_dependency_graph(state.tokens) |
| 272 | + refs_in_fences = _collect_refs_in_fences(state.tokens) |
| 273 | + categories = _categorize_footnotes(refs, footnote_deps, refs_in_fences) |
| 274 | + |
| 275 | + if not keep_orphans: |
| 276 | + for orphan_key in categories.true_orphans: |
| 277 | + del refs[orphan_key] |
| 278 | + |
| 279 | + reorder_state = _build_reordered_list( |
| 280 | + categories, footnote_deps, old_list, refs, keep_orphans |
| 281 | + ) |
| 282 | + |
| 283 | + state.env["footnotes"]["list"] = reorder_state.new_list |
| 284 | + _update_token_ids(state.tokens, reorder_state.old_to_new_id) |
| 285 | + _reassign_subids(state.tokens, refs, reorder_state.new_list) |
0 commit comments