|
6 | 6 | GITHUB_REPO = "PavanMudigonda/python-bro-code" |
7 | 7 | GITHUB_BRANCH = "main" |
8 | 8 |
|
| 9 | +MYSTNB_FRONTMATTER = """\ |
| 10 | +--- |
| 11 | +jupytext: |
| 12 | + text_representation: |
| 13 | + format_name: myst |
| 14 | +kernelspec: |
| 15 | + display_name: Python 3 |
| 16 | + language: python |
| 17 | + name: python3 |
| 18 | +--- |
| 19 | +
|
| 20 | +""" |
| 21 | + |
| 22 | +# Markers that indicate a code block is pseudocode / not runnable |
| 23 | +SKIP_MARKERS = [ |
| 24 | + "# ...", |
| 25 | + "# calculator code here", |
| 26 | + "# Create calculator", |
| 27 | + "# Parse and calculate", |
| 28 | + "# Allow user", |
| 29 | + "# your code here", |
| 30 | + "# TODO", |
| 31 | + "pass #", |
| 32 | +] |
| 33 | + |
9 | 34 | # Pattern to match old badge lines that LINK TO Colab/Kaggle (preserves YouTube badges) |
10 | 35 | OLD_BADGE_RE = re.compile( |
11 | 36 | r'^\s*\[!\[.*?\]\(.*?\)\]\(https?://(?:colab\.research\.google\.com|kaggle\.com/kernels).*?\)\s*' |
@@ -72,6 +97,110 @@ def extract_markdown_from_notebook(nb_path): |
72 | 97 | return '\n\n'.join(parts) |
73 | 98 |
|
74 | 99 |
|
| 100 | +# ── Post-processing helpers applied to every generated doc ────────────── |
| 101 | + |
| 102 | + |
| 103 | +def remove_duplicate_video_tutorial(content): |
| 104 | + """Remove the second '## 📺 Video Tutorial' block if duplicated.""" |
| 105 | + VT_HEADING = "## 📺 Video Tutorial" |
| 106 | + first = content.find(VT_HEADING) |
| 107 | + if first == -1: |
| 108 | + return content |
| 109 | + second = content.find(VT_HEADING, first + len(VT_HEADING)) |
| 110 | + if second == -1: |
| 111 | + return content |
| 112 | + # Find the end of the duplicate block (heading + optional blank + badge line) |
| 113 | + end = second |
| 114 | + lines = content[second:].split('\n') |
| 115 | + remove_count = 0 |
| 116 | + for line in lines: |
| 117 | + remove_count += 1 |
| 118 | + # Stop after the badge line (or after 4 lines max) |
| 119 | + if 'Watch on YouTube' in line or remove_count >= 4: |
| 120 | + break |
| 121 | + end = second + len('\n'.join(lines[:remove_count])) |
| 122 | + # Also consume the blank line before the heading |
| 123 | + if second > 0 and content[second - 1] == '\n': |
| 124 | + second -= 1 |
| 125 | + return content[:second] + content[end:] |
| 126 | + |
| 127 | + |
| 128 | +def _should_convert(code_lines): |
| 129 | + """Return True if a python code block looks runnable.""" |
| 130 | + code = "\n".join(code_lines) |
| 131 | + stripped = [l for l in code_lines if l.strip()] |
| 132 | + if not stripped: |
| 133 | + return False |
| 134 | + for marker in SKIP_MARKERS: |
| 135 | + if marker in code: |
| 136 | + return False |
| 137 | + if all(l.strip().startswith("#") or not l.strip() for l in code_lines): |
| 138 | + return False |
| 139 | + return True |
| 140 | + |
| 141 | + |
| 142 | +def _has_box_drawing(code_lines): |
| 143 | + """Return True if the block contains box-drawing characters.""" |
| 144 | + return any(ch in line for line in code_lines for ch in "│└├─") |
| 145 | + |
| 146 | + |
| 147 | +def convert_python_to_code_cells(content): |
| 148 | + """Convert ```python blocks to ```{code-cell} python where appropriate. |
| 149 | +
|
| 150 | + Blocks with box-drawing chars become ```text. |
| 151 | + Blocks using input() get :tags: [skip-execution]. |
| 152 | + Pseudocode blocks stay as ```python. |
| 153 | + """ |
| 154 | + lines = content.split("\n") |
| 155 | + new_lines = [] |
| 156 | + i = 0 |
| 157 | + while i < len(lines): |
| 158 | + if re.match(r"^```python\s*$", lines[i]): |
| 159 | + code_lines = [] |
| 160 | + i += 1 |
| 161 | + while i < len(lines) and not re.match(r"^```\s*$", lines[i]): |
| 162 | + code_lines.append(lines[i]) |
| 163 | + i += 1 |
| 164 | + closing = lines[i] if i < len(lines) else "```" |
| 165 | + |
| 166 | + if _has_box_drawing(code_lines): |
| 167 | + new_lines.append("```text") |
| 168 | + elif _should_convert(code_lines): |
| 169 | + if any("input(" in l for l in code_lines): |
| 170 | + new_lines.append("```{code-cell} python") |
| 171 | + new_lines.append(":tags: [skip-execution]") |
| 172 | + new_lines.append("") |
| 173 | + else: |
| 174 | + new_lines.append("```{code-cell} python") |
| 175 | + else: |
| 176 | + new_lines.append("```python") |
| 177 | + |
| 178 | + new_lines.extend(code_lines) |
| 179 | + new_lines.append(closing) |
| 180 | + i += 1 |
| 181 | + else: |
| 182 | + new_lines.append(lines[i]) |
| 183 | + i += 1 |
| 184 | + return "\n".join(new_lines) |
| 185 | + |
| 186 | + |
| 187 | +def add_mystnb_frontmatter(content): |
| 188 | + """Prepend myst-nb YAML frontmatter if the doc has code-cell blocks.""" |
| 189 | + if "{code-cell}" not in content: |
| 190 | + return content |
| 191 | + if content.startswith("---"): |
| 192 | + return content |
| 193 | + return MYSTNB_FRONTMATTER + content |
| 194 | + |
| 195 | + |
| 196 | +def postprocess_doc(content): |
| 197 | + """Apply all post-processing steps to generated doc content.""" |
| 198 | + content = remove_duplicate_video_tutorial(content) |
| 199 | + content = convert_python_to_code_cells(content) |
| 200 | + content = add_mystnb_frontmatter(content) |
| 201 | + return content |
| 202 | + |
| 203 | + |
75 | 204 | def main(): |
76 | 205 | base_dir = "." |
77 | 206 | docs_dir = "docs" |
@@ -137,6 +266,9 @@ def main(): |
137 | 266 | else: |
138 | 267 | content += "\n" + notebook_badges |
139 | 268 |
|
| 269 | + # Post-process: deduplicate, convert code cells, add frontmatter |
| 270 | + content = postprocess_doc(content) |
| 271 | + |
140 | 272 | with open(doc_path, 'w', encoding='utf-8') as wf: |
141 | 273 | wf.write(content) |
142 | 274 |
|
|
0 commit comments