Skip to content

Commit 60b69d7

Browse files
authored
Merge pull request #83 from python-project-templates/tkp/wiki
More wiki tweaks
2 parents a2ce284 + e1ed67e commit 60b69d7

File tree

2 files changed

+215
-7
lines changed

2 files changed

+215
-7
lines changed

yardang/conf.py.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ html_js_files = [
129129
master_doc = "index"
130130
templates_path = ["_templates"]
131131
source_suffix = [".rst", ".md", *{{source_suffix}}]
132-
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "node_modules", "_skbuild", ".pytest_cache", "js/*", *{{exclude_patterns}}]
132+
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "node_modules", "_skbuild", ".pytest_cache", "js/*", "*.wiki", "*.wiki/*", "docs/wiki", "docs/wiki/*", *{{exclude_patterns}}]
133133
language = "{{language}}"
134134
pygments_style = "{{pygments_style}}"
135135

yardang/wiki.py

Lines changed: 214 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,13 +147,56 @@ def cleanup_markdown(content: str) -> str:
147147
# Remove anchor tags before headings (e.g., <a id="overview"></a>)
148148
content = re.sub(r'<a id="[^"]+"></a>\s*\n?', "", content)
149149

150-
# Reduce image widths by 50% for wiki (GitHub wiki renders larger)
151-
def reduce_image_width(match):
152-
width = int(match.group(1))
153-
new_width = max(16, width // 2) # Reduce by 50%, minimum 16px
154-
return f'width="{new_width}"'
150+
# Badge URL patterns to skip (don't resize badges)
151+
badge_patterns = [
152+
r"shields\.io",
153+
r"badge\.svg",
154+
r"codecov\.io",
155+
r"github\.com/.+/actions/workflows/.+/badge",
156+
r"img\.shields\.io",
157+
r"coveralls\.io",
158+
r"travis-ci\.org",
159+
r"circleci\.com",
160+
r"appveyor\.com",
161+
r"readthedocs\.org",
162+
]
163+
164+
def is_badge_url(url):
165+
"""Check if URL looks like a badge image."""
166+
return any(re.search(pattern, url) for pattern in badge_patterns)
167+
168+
# Convert large images to HTML img tags with constrained width
169+
# Match: <a href="...">![alt](url)</a> pattern (linked images)
170+
def resize_linked_image(match):
171+
href = match.group(1)
172+
alt = match.group(2)
173+
src = match.group(3)
174+
# Skip badges - they should stay at natural size
175+
if is_badge_url(src):
176+
return match.group(0)
177+
return f'<a href="{href}"><img src="{src}" alt="{alt}" width="120"></a>'
178+
179+
content = re.sub(
180+
r'<a href="([^"]+)">\!\[([^\]]*)\]\(([^)]+)\)</a>',
181+
resize_linked_image,
182+
content,
183+
)
184+
185+
# Match: ![alt](url) pattern (standalone images, not inside links like [![]()])
186+
# Skip images that are inside markdown links (preceded by [)
187+
def resize_standalone_image(match):
188+
alt = match.group(1)
189+
src = match.group(2)
190+
# Skip badges - they should stay at natural size
191+
if is_badge_url(src):
192+
return match.group(0)
193+
return f'<img src="{src}" alt="{alt}" width="120">'
155194

156-
content = re.sub(r'width="(\d+)"', reduce_image_width, content)
195+
content = re.sub(
196+
r'(?<!["\(\[])\!\[([^\]]*)\]\(([^)]+)\)(?!["\)])',
197+
resize_standalone_image,
198+
content,
199+
)
157200

158201
# Fix collapsed div tags - add newlines after > and before <
159202
# Match: <div ...> content </div> and expand it
@@ -210,6 +253,168 @@ def fix_code_block(match):
210253
return content
211254

212255

256+
def cleanup_api_docs(content: str) -> str:
257+
"""Clean up API documentation for better readability.
258+
259+
Reformats dense sphinx-markdown-builder API output:
260+
- Breaks long function signatures into multiple lines
261+
- Removes escaped underscores in code contexts
262+
- Improves parameter list formatting
263+
264+
Args:
265+
content: Markdown content with API documentation.
266+
267+
Returns:
268+
Cleaned API documentation content.
269+
"""
270+
# Remove escaped underscores in code/function contexts
271+
# Match: word\_word patterns and unescape them
272+
content = re.sub(r"(\w)\\_(\w)", r"\1_\2", content)
273+
274+
# Format long function signatures - break parameters onto separate lines
275+
def format_signature(match):
276+
prefix = match.group(1) # ### module.function(
277+
params = match.group(2) # parameters
278+
suffix = match.group(3) # )
279+
280+
# If signature is short enough, keep it
281+
if len(match.group(0)) < 80:
282+
return match.group(0)
283+
284+
# Parse parameters and format them
285+
# Split on ", " but be careful about nested brackets
286+
param_list = []
287+
current = ""
288+
bracket_depth = 0
289+
for char in params:
290+
if char in "([{":
291+
bracket_depth += 1
292+
current += char
293+
elif char in ")]}":
294+
bracket_depth -= 1
295+
current += char
296+
elif char == "," and bracket_depth == 0:
297+
if current.strip():
298+
param_list.append(current.strip())
299+
current = ""
300+
else:
301+
current += char
302+
if current.strip():
303+
param_list.append(current.strip())
304+
305+
# If few parameters, keep on one line
306+
if len(param_list) <= 2:
307+
return match.group(0)
308+
309+
# Format with line breaks
310+
formatted_params = ",\n ".join(param_list)
311+
return f"{prefix}\n {formatted_params}\n{suffix}"
312+
313+
# Match function/method signatures: ### name(params)
314+
content = re.sub(
315+
r"(###\s+[\w.]+\()((?:[^()]+|\([^()]*\))*?)(\))",
316+
format_signature,
317+
content,
318+
)
319+
320+
# Clean up parameter descriptions - ensure proper list formatting
321+
# Match: * **param** – description that may wrap
322+
content = re.sub(
323+
r"\*\s+\*\*(\w+)\*\*\s*[–-]\s*",
324+
r"- **\1**: ",
325+
content,
326+
)
327+
328+
# Clean up "Parameters:" sections - convert to simpler format
329+
content = re.sub(
330+
r"\*\s+\*\*Parameters:\*\*",
331+
"\n**Parameters:**",
332+
content,
333+
)
334+
content = re.sub(
335+
r"\*\s+\*\*Returns:\*\*",
336+
"\n**Returns:**",
337+
content,
338+
)
339+
content = re.sub(
340+
r"\*\s+\*\*Raises:\*\*",
341+
"\n**Raises:**",
342+
content,
343+
)
344+
content = re.sub(
345+
r"\*\s+\*\*Yields:\*\*",
346+
"\n**Yields:**",
347+
content,
348+
)
349+
content = re.sub(
350+
r"\*\s+\*\*Arguments:\*\*",
351+
"\n**Arguments:**",
352+
content,
353+
)
354+
content = re.sub(
355+
r"\*\s+\*\*Throws:\*\*",
356+
"\n**Throws:**",
357+
content,
358+
)
359+
360+
# Fix nested list items under Parameters/Returns etc
361+
# Convert * * to proper nested -
362+
content = re.sub(r"^\s*\*\s+\*\s+", " - ", content, flags=re.MULTILINE)
363+
364+
# Remove orphaned list markers
365+
content = re.sub(r"^\s*\*\s*$", "", content, flags=re.MULTILINE)
366+
367+
# Clean up type annotations in returns
368+
# Match: *type* – and convert to: (*type*)
369+
content = re.sub(
370+
r"\n\s+\*(\w+)\*\s*[–-]\s*\n",
371+
r"\n - *\1*: ",
372+
content,
373+
)
374+
375+
# Fix "#### NOTE" / "#### WARNING" etc to be more prominent
376+
content = re.sub(r"####\s+(NOTE|WARNING|SEE ALSO|IMPORTANT)", r"> **\1**", content)
377+
378+
return content
379+
380+
381+
def _is_api_page(filename: str, content: str) -> bool:
382+
"""Detect if a markdown file is an API documentation page.
383+
384+
Args:
385+
filename: Name of the markdown file.
386+
content: Content of the file.
387+
388+
Returns:
389+
True if this appears to be API documentation.
390+
"""
391+
# Check filename patterns
392+
api_filename_patterns = [
393+
"api",
394+
"autoapi",
395+
"reference",
396+
]
397+
filename_lower = filename.lower()
398+
if any(pattern in filename_lower for pattern in api_filename_patterns):
399+
return True
400+
401+
# Check content patterns that indicate API docs
402+
api_content_indicators = [
403+
"**Parameters:**",
404+
"* **Parameters:**",
405+
"**Returns:**",
406+
"* **Returns:**",
407+
"**Raises:**",
408+
"* **Raises:**",
409+
"**Arguments:**",
410+
"* **Arguments:**",
411+
]
412+
413+
indicator_count = sum(1 for ind in api_content_indicators if ind in content)
414+
# If multiple API-style sections, treat as API docs
415+
return indicator_count >= 2
416+
417+
213418
def extract_toctree_entries(content: str) -> List[Tuple[str, str]]:
214419
"""Extract toctree entries from markdown content.
215420
@@ -553,6 +758,9 @@ def process_wiki_output(
553758
content = md_file.read_text(encoding="utf-8")
554759
# Clean up markdown formatting issues
555760
content = cleanup_markdown(content)
761+
# Clean up API documentation formatting if this looks like an API page
762+
if _is_api_page(md_file.name, content):
763+
content = cleanup_api_docs(content)
556764
# Fix internal links for wiki
557765
fixed_content = fix_wiki_links(content, page_map)
558766
md_file.write_text(fixed_content, encoding="utf-8")

0 commit comments

Comments
 (0)