|
3 | 3 | from __future__ import annotations |
4 | 4 |
|
5 | 5 | from collections.abc import Callable |
| 6 | +import functools |
| 7 | +import re |
6 | 8 | from typing import TYPE_CHECKING |
7 | 9 |
|
8 | 10 | from . import rules_inline |
|
15 | 17 | from markdown_it import MarkdownIt |
16 | 18 |
|
17 | 19 |
|
| 20 | +# Default set of characters that terminate a text token and allow inline rules to fire. |
| 21 | +# '{}$%@~+=:' reserved for extensions. |
| 22 | +# Note: Don't confuse with "Markdown ASCII Punctuation" chars. |
| 23 | +# http://spec.commonmark.org/0.15/#ascii-punctuation-character |
| 24 | +_DEFAULT_TERMINATORS: frozenset[str] = frozenset( |
| 25 | + { |
| 26 | + "\n", |
| 27 | + "!", |
| 28 | + "#", |
| 29 | + "$", |
| 30 | + "%", |
| 31 | + "&", |
| 32 | + "*", |
| 33 | + "+", |
| 34 | + "-", |
| 35 | + ":", |
| 36 | + "<", |
| 37 | + "=", |
| 38 | + ">", |
| 39 | + "@", |
| 40 | + "[", |
| 41 | + "\\", |
| 42 | + "]", |
| 43 | + "^", |
| 44 | + "_", |
| 45 | + "`", |
| 46 | + "{", |
| 47 | + "}", |
| 48 | + "~", |
| 49 | + } |
| 50 | +) |
| 51 | + |
| 52 | + |
| 53 | +# Lazily compiled regex for the default terminator set. The @cache ensures it is |
| 54 | +# compiled at most once (on first ParserInline instantiation) and shared across all |
| 55 | +# instances that have not added extra chars, keeping __init__ cost near zero. |
| 56 | +@functools.cache |
| 57 | +def _default_terminator_re() -> re.Pattern[str]: |
| 58 | + return re.compile("[" + re.escape("".join(_DEFAULT_TERMINATORS)) + "]") |
| 59 | + |
| 60 | + |
18 | 61 | # Parser rules |
19 | 62 | RuleFuncInlineType = Callable[[StateInline, bool], bool] |
20 | 63 | """(state: StateInline, silent: bool) -> matched: bool) |
@@ -61,6 +104,30 @@ def __init__(self) -> None: |
61 | 104 | self.ruler2 = Ruler[RuleFuncInline2Type]() |
62 | 105 | for name, rule2 in _rules2: |
63 | 106 | self.ruler2.push(name, rule2) |
| 107 | + # Characters that stop the text rule, allowing other inline rules to fire. |
| 108 | + # _extra_terminator_chars is only allocated when add_terminator_char() is called |
| 109 | + # with a char outside the defaults, keeping __init__ allocation-free. |
| 110 | + self._extra_terminator_chars: set[str] = set() |
| 111 | + # Pre-compiled regex shared with all default instances (no copy in the common path). |
| 112 | + self.terminator_re: re.Pattern[str] = _default_terminator_re() |
| 113 | + |
| 114 | + def add_terminator_char(self, ch: str) -> None: |
| 115 | + """Register a character that stops the ``text`` rule, allowing inline rules to fire. |
| 116 | +
|
| 117 | + This lets plugins declare which characters their inline rules react to, |
| 118 | + mirroring the ``MARKER`` mechanism in the Rust markdown-it implementation. |
| 119 | +
|
| 120 | + :param ch: A single character to add to the terminator set. |
| 121 | + """ |
| 122 | + if ch not in _DEFAULT_TERMINATORS and ch not in self._extra_terminator_chars: |
| 123 | + self._extra_terminator_chars.add(ch) |
| 124 | + self.terminator_re = re.compile( |
| 125 | + "[" |
| 126 | + + re.escape( |
| 127 | + "".join(_DEFAULT_TERMINATORS | self._extra_terminator_chars) |
| 128 | + ) |
| 129 | + + "]" |
| 130 | + ) |
64 | 131 |
|
65 | 132 | def skipToken(self, state: StateInline) -> None: |
66 | 133 | """Skip single token by running all rules in validation mode; |
|
0 commit comments