|
32 | 32 | # has unclosed constructs). |
33 | 33 |
|
34 | 34 |
|
| 35 | +def _strip_fenced_code(text: str) -> str: |
| 36 | + """Return *text* with content between code fences replaced by empty lines. |
| 37 | +
|
| 38 | + This allows inline-marker counting to ignore literal characters inside |
| 39 | + fenced code blocks (e.g. ``*`` inside a code block is not an unclosed |
| 40 | + italic marker). |
| 41 | + """ |
| 42 | + lines = text.split("\n") |
| 43 | + result_lines: list[str] = [] |
| 44 | + in_fence = False |
| 45 | + for line in lines: |
| 46 | + stripped = line.lstrip() |
| 47 | + if stripped.startswith("```") or stripped.startswith("~~~"): |
| 48 | + in_fence = not in_fence |
| 49 | + result_lines.append("") # replace fence line itself |
| 50 | + elif in_fence: |
| 51 | + result_lines.append("") # replace content inside fence |
| 52 | + else: |
| 53 | + result_lines.append(line) |
| 54 | + return "\n".join(result_lines) |
| 55 | + |
| 56 | + |
35 | 57 | def _remend(text: str) -> str: |
36 | | - """Close unclosed inline markdown constructs. |
| 58 | + """Repair incomplete markdown by closing unclosed inline markers. |
37 | 59 |
|
38 | 60 | This is a simplified Python equivalent of the ``remend`` npm package. |
39 | | - It scans for unclosed ``**``, ``*``, ``~~``, `` ` ``, and ``[`` and |
40 | | - appends the matching closers. |
| 61 | + Fixes issues in the previous implementation: |
| 62 | + - Dead code around ``star_count2`` (removed) |
| 63 | + - ``~~`` counting confused by ``~~~`` code fences (handled by stripping) |
| 64 | + - Missing ``__`` / ``_`` (underscore bold/italic) handling (added) |
| 65 | + - Markers inside code blocks no longer counted as inline markers |
| 66 | +
|
| 67 | + Strategy: count *unescaped* characters for ``*`` and ``_`` (parity-based) |
| 68 | + outside code fences and code spans. Count ``~~`` substrings for |
| 69 | + strikethrough. Count backtick characters for inline code. |
41 | 70 | """ |
42 | 71 | result = text |
43 | 72 |
|
44 | | - # --- code spans (backtick) --- |
45 | | - # Simple heuristic: if the total number of backtick characters is odd, |
46 | | - # there must be an unclosed code span -- close it with one backtick. |
47 | | - # This is idempotent: after closing, the count becomes even and no |
48 | | - # further modification is needed. |
49 | | - if result.count("`") % 2 != 0: |
| 73 | + # --- code fences --- |
| 74 | + # If inside an unclosed code fence, close it and return immediately. |
| 75 | + in_code_fence = False |
| 76 | + for line in result.split("\n"): |
| 77 | + stripped = line.lstrip() |
| 78 | + if stripped.startswith("```") or stripped.startswith("~~~"): |
| 79 | + in_code_fence = not in_code_fence |
| 80 | + |
| 81 | + if in_code_fence: |
| 82 | + result += "\n```" |
| 83 | + return result |
| 84 | + |
| 85 | + # Strip fenced code blocks so their contents don't affect inline counts. |
| 86 | + outside_fences = _strip_fenced_code(result) |
| 87 | + |
| 88 | + # --- inline code backticks --- |
| 89 | + # Count total backtick characters outside code fences. If odd, one code |
| 90 | + # span is unclosed -- append a single backtick. |
| 91 | + backtick_count = outside_fences.count("`") |
| 92 | + if backtick_count % 2 != 0: |
50 | 93 | result += "`" |
51 | 94 |
|
52 | | - # --- bold / italic --- |
53 | | - # Count unescaped * sequences |
| 95 | + # --- bold / italic (* based) --- |
| 96 | + # Count total unescaped * characters outside code fences. If odd, append |
| 97 | + # one to make even. This is idempotent: once the count is even, no |
| 98 | + # further change occurs. |
54 | 99 | star_count = 0 |
55 | 100 | j = 0 |
56 | | - temp = result |
57 | | - while j < len(temp): |
58 | | - if temp[j] == "\\": |
| 101 | + while j < len(outside_fences): |
| 102 | + if outside_fences[j] == "\\": |
59 | 103 | j += 2 |
60 | 104 | continue |
61 | | - if temp[j] == "*": |
62 | | - run = 0 |
63 | | - while j < len(temp) and temp[j] == "*": |
64 | | - run += 1 |
65 | | - j += 1 |
66 | | - star_count += run |
67 | | - continue |
| 105 | + if outside_fences[j] == "*": |
| 106 | + star_count += 1 |
68 | 107 | j += 1 |
69 | 108 |
|
70 | 109 | if star_count % 2 != 0: |
71 | 110 | result += "*" |
72 | | - # After fixing single, check for double |
73 | | - star_count2 = 0 |
74 | | - k = 0 |
75 | | - temp2 = result |
76 | | - while k < len(temp2): |
77 | | - if temp2[k] == "\\": |
78 | | - k += 2 |
79 | | - continue |
80 | | - if temp2[k] == "*": |
81 | | - run = 0 |
82 | | - while k < len(temp2) and temp2[k] == "*": |
83 | | - run += 1 |
84 | | - k += 1 |
85 | | - star_count2 += run |
| 111 | + |
| 112 | + # --- bold / italic (_ based) --- |
| 113 | + # Same parity approach for underscore markers. |
| 114 | + under_count = 0 |
| 115 | + j = 0 |
| 116 | + while j < len(outside_fences): |
| 117 | + if outside_fences[j] == "\\": |
| 118 | + j += 2 |
86 | 119 | continue |
87 | | - k += 1 |
| 120 | + if outside_fences[j] == "_": |
| 121 | + under_count += 1 |
| 122 | + j += 1 |
| 123 | + |
| 124 | + if under_count % 2 != 0: |
| 125 | + result += "_" |
88 | 126 |
|
89 | | - # --- strikethrough ~~ --- |
90 | | - tilde_pairs = result.count("~~") |
| 127 | + # --- strikethrough ~~ --- |
| 128 | + # Count non-overlapping ``~~`` substrings outside code fences. If odd, |
| 129 | + # one strikethrough is unclosed -- append ``~~``. |
| 130 | + tilde_pairs = outside_fences.count("~~") |
91 | 131 | if tilde_pairs % 2 != 0: |
92 | 132 | result += "~~" |
93 | 133 |
|
94 | 134 | # --- links [text](url) --- |
95 | 135 | open_brackets = 0 |
96 | 136 | m = 0 |
97 | | - while m < len(result): |
98 | | - if result[m] == "\\": |
| 137 | + while m < len(outside_fences): |
| 138 | + if outside_fences[m] == "\\": |
99 | 139 | m += 2 |
100 | 140 | continue |
101 | | - if result[m] == "[": |
| 141 | + if outside_fences[m] == "[": |
102 | 142 | open_brackets += 1 |
103 | | - elif result[m] == "]": |
| 143 | + elif outside_fences[m] == "]": |
104 | 144 | open_brackets -= 1 |
105 | 145 | m += 1 |
106 | 146 | if open_brackets > 0: |
|
0 commit comments