Skip to content

Commit 2742453

Browse files
committed
github action to generate notebooks
1 parent cd17e0a commit 2742453

2 files changed

Lines changed: 154 additions & 0 deletions

File tree

.github/workflows/md-to-ipynb.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: Convert Markdown to Notebooks
2+
3+
on:
4+
push:
5+
branches: [main]
6+
paths:
7+
- 'episodes/*.md'
8+
- 'notebooks/convert-md-to-ipynb.py'
9+
pull_request:
10+
paths:
11+
- 'episodes/*.md'
12+
- 'notebooks/convert-md-to-ipynb.py'
13+
workflow_dispatch: # adds the manual trigger
14+
15+
16+
jobs:
17+
convert:
18+
runs-on: ubuntu-latest
19+
steps:
20+
- name: Check out repository
21+
uses: actions/checkout@v3
22+
23+
- name: Set up Python
24+
uses: actions/setup-python@v4
25+
with:
26+
python-version: '3.11'
27+
28+
- name: Install dependencies
29+
run: pip install nbformat
30+
31+
- name: Convert markdown files to notebooks
32+
run: |
33+
mkdir -p notebooks
34+
for file in episodes/*.md; do
35+
filename=$(basename "$file")
36+
python notebooks/convert-md-to-ipynb.py episodes notebooks "https://raw.githubusercontent.com/${{ github.repository }}/main/images" "$filename"
37+
done
38+
39+
- name: Commit and push notebooks
40+
run: |
41+
git config --global user.name "github-actions"
42+
git config --global user.email "github-actions@github.com"
43+
git add notebooks/*.ipynb
44+
git diff --cached --quiet || git commit -m "Auto-convert .md episodes to .ipynb notebooks"
45+
git push
46+
env:
47+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

notebooks/convert-md-to-ipynb.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import os
2+
import re
3+
import nbformat as nbf
4+
import argparse
5+
6+
def remove_challenge_solutions(md_content):
7+
"""
8+
Removes entire challenge blocks that include a 'Solution' section
9+
followed by '{:.challenge}'. Handles nested quote syntax.
10+
"""
11+
pattern = r"""
12+
^ # Start of line
13+
(?:\s*>+\s*)* # Any number of nested blockquote markers
14+
\#+\s*Solution.*? # Match '## Solution' heading with optional text
15+
(?:\n # Match newline followed by...
16+
(?!\s*{:\s*\.challenge\s*}) # ...but not the final '{:.challenge}' line
17+
.*? # ...any characters
18+
)*? # ...repeat lazily
19+
^\s*{:\s*\.challenge\s*}\s*$ # Final marker line
20+
"""
21+
return re.sub(pattern, '', md_content, flags=re.MULTILINE | re.DOTALL | re.VERBOSE)
22+
23+
24+
def md_to_notebook(md_file, notebook_file, base_image_url):
25+
with open(md_file, 'r') as file:
26+
md_lines = file.readlines()
27+
28+
print("Preview of the first 100 lines of the Markdown file:")
29+
for i, line in enumerate(md_lines[:100]):
30+
print(f"{i+1}: {line}", end='')
31+
print("\n")
32+
33+
md_content = ''.join(md_lines)
34+
35+
nb = nbf.v4.new_notebook()
36+
cells = []
37+
38+
# Title from YAML
39+
title_match = re.search(r'^---\s*title:\s*"(.*?)".*?---', md_content, flags=re.DOTALL | re.MULTILINE)
40+
if title_match:
41+
title = title_match.group(1)
42+
cells.append(nbf.v4.new_markdown_cell(f"# {title}"))
43+
44+
# Remove YAML front matter
45+
md_content = re.sub(r'^---.*?---', '', md_content, flags=re.DOTALL | re.MULTILINE)
46+
47+
# Remove > ## Solution blocks
48+
md_content = remove_challenge_solutions(md_content)
49+
50+
# Replace image paths
51+
def replace_image_path(match):
52+
alt_text = match.group(1)
53+
image_path = match.group(2)
54+
web_image_url = f"{base_image_url}/{os.path.basename(image_path)}"
55+
return f"![{alt_text}]({web_image_url})"
56+
57+
md_content = re.sub(r'!\[(.*?)\]\((.*?)\)', replace_image_path, md_content)
58+
59+
# Split into lines
60+
lines = md_content.split('\n')
61+
code_buffer = []
62+
is_in_code_block = False
63+
text_buffer = []
64+
65+
def process_buffer(buffer, cell_type="markdown"):
66+
if buffer:
67+
content = '\n'.join(buffer).strip()
68+
if cell_type == "code":
69+
cells.append(nbf.v4.new_code_cell(content))
70+
else:
71+
cells.append(nbf.v4.new_markdown_cell(content))
72+
buffer.clear()
73+
74+
for line in lines:
75+
if line.startswith('```'):
76+
is_in_code_block = not is_in_code_block
77+
if is_in_code_block:
78+
process_buffer(text_buffer, "markdown")
79+
else:
80+
process_buffer(code_buffer, "code")
81+
elif is_in_code_block:
82+
code_buffer.append(line)
83+
else:
84+
text_buffer.append(line)
85+
86+
process_buffer(text_buffer, "markdown")
87+
cells = [cell for cell in cells if cell['source'].strip()]
88+
nb['cells'] = cells
89+
90+
with open(notebook_file, 'w') as file:
91+
nbf.write(nb, file)
92+
93+
if __name__ == "__main__":
94+
parser = argparse.ArgumentParser(description="Convert Markdown file to Jupyter Notebook")
95+
parser.add_argument("input_dir")
96+
parser.add_argument("output_dir")
97+
parser.add_argument("base_image_url")
98+
parser.add_argument("filename")
99+
100+
args = parser.parse_args()
101+
102+
input_path = os.path.join(args.input_dir, args.filename)
103+
os.makedirs(args.output_dir, exist_ok=True)
104+
output_path = os.path.join(args.output_dir, os.path.splitext(args.filename)[0] + ".ipynb")
105+
106+
print(f"Converting {input_path} to {output_path}...")
107+
md_to_notebook(input_path, output_path, args.base_image_url)

0 commit comments

Comments
 (0)