github action to generate notebooks

qualiaMachine · qualiaMachine · commit 27424534ee6b · 2025-05-01T16:04:13.000-05:00
diff --git a/.github/workflows/md-to-ipynb.yml b/.github/workflows/md-to-ipynb.yml
@@ -0,0 +1,47 @@
+name: Convert Markdown to Notebooks
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'episodes/*.md'
+      - 'notebooks/convert-md-to-ipynb.py'
+  pull_request:
+    paths:
+      - 'episodes/*.md'
+      - 'notebooks/convert-md-to-ipynb.py'
+  workflow_dispatch:  # adds the manual trigger
+
+
+jobs:
+  convert:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: pip install nbformat
+
+      - name: Convert markdown files to notebooks
+        run: |
+          mkdir -p notebooks
+          for file in episodes/*.md; do
+            filename=$(basename "$file")
+            python notebooks/convert-md-to-ipynb.py episodes notebooks "https://raw.githubusercontent.com/${{ github.repository }}/main/images" "$filename"
+          done
+
+      - name: Commit and push notebooks
+        run: |
+          git config --global user.name "github-actions"
+          git config --global user.email "github-actions@github.com"
+          git add notebooks/*.ipynb
+          git diff --cached --quiet || git commit -m "Auto-convert .md episodes to .ipynb notebooks"
+          git push
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/notebooks/convert-md-to-ipynb.py b/notebooks/convert-md-to-ipynb.py
@@ -0,0 +1,107 @@
+import os
+import re
+import nbformat as nbf
+import argparse
+
+def remove_challenge_solutions(md_content):
+    """
+    Removes entire challenge blocks that include a 'Solution' section 
+    followed by '{:.challenge}'. Handles nested quote syntax.
+    """
+    pattern = r"""
+        ^                       # Start of line
+        (?:\s*>+\s*)*           # Any number of nested blockquote markers
+        \#+\s*Solution.*?       # Match '## Solution' heading with optional text
+        (?:\n                   # Match newline followed by...
+            (?!\s*{:\s*\.challenge\s*})  # ...but not the final '{:.challenge}' line
+            .*?                 # ...any characters
+        )*?                     # ...repeat lazily
+        ^\s*{:\s*\.challenge\s*}\s*$  # Final marker line
+    """
+    return re.sub(pattern, '', md_content, flags=re.MULTILINE | re.DOTALL | re.VERBOSE)
+
+
+def md_to_notebook(md_file, notebook_file, base_image_url):
+    with open(md_file, 'r') as file:
+        md_lines = file.readlines()
+
+    print("Preview of the first 100 lines of the Markdown file:")
+    for i, line in enumerate(md_lines[:100]):
+        print(f"{i+1}: {line}", end='')
+    print("\n")
+
+    md_content = ''.join(md_lines)
+
+    nb = nbf.v4.new_notebook()
+    cells = []
+
+    # Title from YAML
+    title_match = re.search(r'^---\s*title:\s*"(.*?)".*?---', md_content, flags=re.DOTALL | re.MULTILINE)
+    if title_match:
+        title = title_match.group(1)
+        cells.append(nbf.v4.new_markdown_cell(f"# {title}"))
+
+    # Remove YAML front matter
+    md_content = re.sub(r'^---.*?---', '', md_content, flags=re.DOTALL | re.MULTILINE)
+
+    # Remove > ## Solution blocks
+    md_content = remove_challenge_solutions(md_content)
+
+    # Replace image paths
+    def replace_image_path(match):
+        alt_text = match.group(1)
+        image_path = match.group(2)
+        web_image_url = f"{base_image_url}/{os.path.basename(image_path)}"
+        return f"![{alt_text}]({web_image_url})"
+
+    md_content = re.sub(r'!\[(.*?)\]\((.*?)\)', replace_image_path, md_content)
+
+    # Split into lines
+    lines = md_content.split('\n')
+    code_buffer = []
+    is_in_code_block = False
+    text_buffer = []
+
+    def process_buffer(buffer, cell_type="markdown"):
+        if buffer:
+            content = '\n'.join(buffer).strip()
+            if cell_type == "code":
+                cells.append(nbf.v4.new_code_cell(content))
+            else:
+                cells.append(nbf.v4.new_markdown_cell(content))
+            buffer.clear()
+
+    for line in lines:
+        if line.startswith('```'):
+            is_in_code_block = not is_in_code_block
+            if is_in_code_block:
+                process_buffer(text_buffer, "markdown")
+            else:
+                process_buffer(code_buffer, "code")
+        elif is_in_code_block:
+            code_buffer.append(line)
+        else:
+            text_buffer.append(line)
+
+    process_buffer(text_buffer, "markdown")
+    cells = [cell for cell in cells if cell['source'].strip()]
+    nb['cells'] = cells
+
+    with open(notebook_file, 'w') as file:
+        nbf.write(nb, file)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert Markdown file to Jupyter Notebook")
+    parser.add_argument("input_dir")
+    parser.add_argument("output_dir")
+    parser.add_argument("base_image_url")
+    parser.add_argument("filename")
+
+    args = parser.parse_args()
+
+    input_path = os.path.join(args.input_dir, args.filename)
+    os.makedirs(args.output_dir, exist_ok=True)
+    output_path = os.path.join(args.output_dir, os.path.splitext(args.filename)[0] + ".ipynb")
+
+    print(f"Converting {input_path} to {output_path}...")
+    md_to_notebook(input_path, output_path, args.base_image_url)