Skip to content
This repository was archived by the owner on Mar 26, 2026. It is now read-only.

Commit f3db4bf

Browse files
committed
wip
1 parent dd3e090 commit f3db4bf

File tree

1 file changed

+34
-98
lines changed

1 file changed

+34
-98
lines changed

gapic/utils/rst.py

Lines changed: 34 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -13,148 +13,84 @@
1313
# limitations under the License.
1414

1515
import re
16-
from typing import Optional, List, Dict
16+
from typing import Optional, Dict
1717

1818
import pypandoc # type: ignore
1919

2020
from gapic.utils.lines import wrap
2121

22-
# --- PERFORMANCE CACHE ---
22+
# Cache for the few complex items we actually send to pandoc
2323
_RAW_RST_CACHE: Dict[str, str] = {}
2424

25-
26-
def _aggressive_fast_convert(text: str) -> Optional[str]:
25+
def _tuned_fast_convert(text: str) -> Optional[str]:
2726
"""
28-
Converts common Markdown (Code, Links, Lists) to RST using pure Python.
29-
Only gives up (returns None) for complex structures like Tables.
27+
Converts Markdown to RST using pure Python.
28+
Only falls back to Pandoc for Tables and Images.
3029
"""
31-
# 1. TABLE CHECK (The only thing we strictly need Pandoc for)
32-
# If we see a pipe surrounded by spaces, it's likely a table.
33-
if re.search(r" \| ", text) or re.search(r"\|\n", text):
30+
# --- 1. FALLBACKS ---
31+
# Tables (pipe surrounded by spaces) or Images (![).
32+
# We allow "][" (Reference Links) to be handled by Python now.
33+
if (re.search(r" \| ", text) or re.search(r"\|\n", text)) or "![" in text:
3434
return None
3535

36-
# 2. CODE BLOCKS: `code` -> ``code``
37-
# RST requires double backticks. Markdown uses one.
38-
# We look for backticks that aren't already double.
39-
# Regex: Negative lookbehind/lookahead to ensure we don't match ``already rst``.
40-
converted = re.sub(r"(?<!`)`([^`]+)`(?!`)", r"``\1``", text)
36+
# --- 2. CONVERSION ---
37+
38+
# A. CODE BLOCKS: `code` -> ``code``
39+
# CRITICAL: Run this FIRST. This ensures we handle existing backticks
40+
# before we create NEW backticks for links.
41+
# (?<!:) ensures we don't break Sphinx roles like :class:`MyClass`
42+
converted = re.sub(r"(?<!:|`)`([^`]+)`(?!`)", r"``\1``", text)
43+
44+
# B. REFERENCE LINKS: [Text][Ref] -> `Text <Ref>`__
45+
# We fix the broken documentation by converting these to valid RST links.
46+
# Since step A is done, these new backticks will NOT be doubled.
47+
converted = re.sub(r"\[([^\]]+)\]\[([^\]]+)\]", r"`\1 <\2>`__", converted)
4148

42-
# 3. LINKS: [Text](URL) -> `Text <URL>`__
43-
# We use anonymous links (__) to avoid collision issues.
49+
# C. STANDARD LINKS: [Text](URL) -> `Text <URL>`__
4450
converted = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"`\1 <\2>`__", converted)
4551

46-
# 4. BOLD: **text** -> **text** (Compatible, no change needed)
47-
48-
# 5. HEADINGS: # Heading -> Heading\n=======
49-
# (Simple fix for H1/H2, mostly sufficient for docstrings)
52+
# D. BOLD/ITALICS:
53+
converted = re.sub(r"(?<!_)\b_([^_]+)_\b(?!_)", r"*\1*", converted)
54+
55+
# E. HEADINGS: # Heading -> Heading\n=======
5056
converted = re.sub(r"^# (.*)$", r"\1\n" + "=" * 10, converted, flags=re.MULTILINE)
5157
converted = re.sub(r"^## (.*)$", r"\1\n" + "-" * 10, converted, flags=re.MULTILINE)
5258

53-
# 6. LISTS: Markdown lists (- item) work in RST mostly fine.
54-
# We just ensure there's a newline before a list starts to satisfy RST strictness.
55-
converted = re.sub(r"(\n[^-*].*)\n\s*[-*] ", r"\1\n\n- ", converted)
59+
# F. LISTS: Markdown (- item) needs a preceding newline for RST.
60+
converted = re.sub(r"(\n[^-*].*)\n\s*([-*] )", r"\1\n\n\2", converted)
5661

5762
return converted
5863

59-
60-
def batch_convert_docstrings(docstrings: List[str]):
61-
"""
62-
Optimized Batch Processor.
63-
1. Tries Aggressive Python Conversion first.
64-
2. Only sends Tables/Complex items to Pandoc.
65-
"""
66-
unique_docs = set(docstrings)
67-
68-
# Filter: Only keep strings that need conversion and aren't in cache
69-
candidates = [
70-
d for d in unique_docs
71-
if d
72-
and d not in _RAW_RST_CACHE
73-
and re.search(r"[|*`_[\]#]", d) # Only interesting chars
74-
]
75-
76-
if not candidates:
77-
return
78-
79-
pandoc_batch: List[str] = []
80-
81-
# 1. Try Python Conversion
82-
for doc in candidates:
83-
fast_result = _aggressive_fast_convert(doc)
84-
if fast_result is not None:
85-
# Success: Saved ~50ms per call
86-
_RAW_RST_CACHE[doc] = fast_result.strip()
87-
else:
88-
# Failed: Must use Pandoc (Tables, etc)
89-
pandoc_batch.append(doc)
90-
91-
# 2. Process Remainder with Pandoc (Likely < 10 items)
92-
if not pandoc_batch:
93-
return
94-
95-
separator = "\n\n__GAPIC_BATCH_SPLIT__\n\n"
96-
giant_payload = separator.join(pandoc_batch)
97-
98-
try:
99-
converted_payload = pypandoc.convert_text(
100-
giant_payload,
101-
"rst",
102-
format="commonmark",
103-
extra_args=["--columns=1000"]
104-
)
105-
except Exception:
106-
return
107-
108-
split_marker = "__GAPIC_BATCH_SPLIT__"
109-
results = converted_payload.split(split_marker)
110-
111-
if len(results) == len(pandoc_batch):
112-
for original, converted in zip(pandoc_batch, results):
113-
_RAW_RST_CACHE[original] = converted.strip()
114-
115-
11664
def rst(
11765
text: str,
11866
width: int = 72,
11967
indent: int = 0,
12068
nl: Optional[bool] = None,
12169
source_format: str = "commonmark",
12270
):
123-
"""Convert the given text to ReStructured Text."""
124-
12571
# 1. Super Fast Path: No special chars? Just wrap.
12672
if not re.search(r"[|*`_[\]#]", text):
127-
answer = wrap(
128-
text,
129-
indent=indent,
130-
offset=indent + 3,
131-
width=width - indent,
132-
)
73+
answer = wrap(text, indent=indent, offset=indent + 3, width=width - indent)
13374
return _finalize(answer, nl, indent)
13475

13576
# 2. Check Cache
13677
if text in _RAW_RST_CACHE:
13778
raw_rst = _RAW_RST_CACHE[text]
13879
else:
139-
# Slow Path: Missed by batch or new string.
140-
# TRY PYTHON CONVERT FIRST.
141-
# This prevents the 'Slow Path' from actually being slow.
142-
fast_result = _aggressive_fast_convert(text)
80+
# 3. Try Tuned Python Convert (Fastest)
81+
fast_result = _tuned_fast_convert(text)
14382

14483
if fast_result is not None:
14584
raw_rst = fast_result.strip()
14685
else:
147-
# The absolute last resort: Shell out to Pandoc
86+
# 4. Fallback to Pandoc (Only for Tables/Images)
14887
raw_rst = pypandoc.convert_text(
149-
text,
150-
"rst",
151-
format=source_format,
152-
extra_args=["--columns=1000"]
88+
text, "rst", format=source_format, extra_args=["--columns=1000"]
15389
).strip()
15490

15591
_RAW_RST_CACHE[text] = raw_rst
15692

157-
# 3. Python Formatting
93+
# 5. Python Formatting
15894
if "::" in raw_rst or ".. code" in raw_rst:
15995
answer = raw_rst.replace("\n", f"\n{' ' * indent}")
16096
else:

0 commit comments

Comments
 (0)