|
| 1 | +name: Check Jupyter Notebooks |
| 2 | + |
| 3 | +on: |
| 4 | + pull_request: |
| 5 | + branches: |
| 6 | + - main |
| 7 | + # Run when notebooks, Python dependencies, or this workflow change |
| 8 | + paths: |
| 9 | + - 'domains/**/explore/*.ipynb' |
| 10 | + - 'pyproject.toml' |
| 11 | + - 'uv.lock' |
| 12 | + - 'scripts/activateUvEnvironment.sh' |
| 13 | + - '.github/workflows/internal-check-notebooks.yml' |
| 14 | + |
| 15 | +jobs: |
| 16 | + smoke-test-notebooks: |
| 17 | + runs-on: ubuntu-22.04 |
| 18 | + |
| 19 | + steps: |
| 20 | + - name: Checkout GIT Repository |
| 21 | + uses: actions/checkout@v6 |
| 22 | + |
| 23 | + - name: (uv Setup) Install uv |
| 24 | + uses: astral-sh/setup-uv@v6 |
| 25 | + with: |
| 26 | + python-version: '3.12' |
| 27 | + |
| 28 | + - name: (uv Setup) Sync dependencies from lockfile |
| 29 | + run: uv sync --frozen |
| 30 | + |
| 31 | + - name: Check notebook syntax and imports |
| 32 | + # For each notebook: parse each Python code cell as Python AST to catch SyntaxErrors, |
| 33 | + # then collect every unique import statement across all notebooks and run them |
| 34 | + # in a single Python process to catch ModuleNotFoundError / ImportError. |
| 35 | + # Cell magics (%%html, %%bash, …) and line magics (%matplotlib, …) are skipped — |
| 36 | + # they are not Python and would cause false-positive SyntaxErrors. |
| 37 | + # No kernel execution — no Neo4j needed, finishes in seconds. |
| 38 | + run: | |
| 39 | + uv run python3 - <<'PYEOF' |
| 40 | + import ast, json, subprocess, sys |
| 41 | + from pathlib import Path |
| 42 | +
|
| 43 | + notebooks = sorted(Path("domains").glob("**/explore/*.ipynb")) |
| 44 | + import_lines = set() |
| 45 | + syntax_failures = [] |
| 46 | +
|
| 47 | + for notebook in notebooks: |
| 48 | + print(f"Parsing {notebook}", flush=True) |
| 49 | + nb = json.loads(notebook.read_text()) |
| 50 | + for cell in nb["cells"]: |
| 51 | + if cell["cell_type"] != "code": |
| 52 | + continue |
| 53 | + source = "".join(cell["source"]).strip() |
| 54 | + if not source: |
| 55 | + continue |
| 56 | + # Skip cell magics (%%html, %%bash, etc.) — not Python code |
| 57 | + if source.startswith("%%"): |
| 58 | + continue |
| 59 | + # Remove line magics (%matplotlib, %time, etc.) — not valid Python syntax |
| 60 | + python_source = "\n".join(line for line in source.split("\n") if not line.lstrip().startswith("%")) |
| 61 | + if not python_source.strip(): |
| 62 | + continue |
| 63 | + try: |
| 64 | + tree = ast.parse(python_source) |
| 65 | + except SyntaxError as e: |
| 66 | + syntax_failures.append(f"{notebook}: SyntaxError line {e.lineno}: {e.msg}") |
| 67 | + continue |
| 68 | + for node in ast.walk(tree): |
| 69 | + if isinstance(node, ast.Import): |
| 70 | + for alias in node.names: |
| 71 | + import_lines.add(f"import {alias.name}") |
| 72 | + elif isinstance(node, ast.ImportFrom) and node.module: |
| 73 | + names = ", ".join(a.name for a in node.names) |
| 74 | + import_lines.add(f"from {node.module} import {names}") |
| 75 | +
|
| 76 | + if syntax_failures: |
| 77 | + print("Syntax errors found:", file=sys.stderr) |
| 78 | + for f in syntax_failures: |
| 79 | + print(f" {f}", file=sys.stderr) |
| 80 | + sys.exit(1) |
| 81 | +
|
| 82 | + import_script = "\n".join(sorted(import_lines)) |
| 83 | + print(f"\nRunning {len(import_lines)} unique import statements from {len(notebooks)} notebooks...", flush=True) |
| 84 | + result = subprocess.run( |
| 85 | + [sys.executable, "-c", import_script], |
| 86 | + capture_output=True, text=True, |
| 87 | + ) |
| 88 | + if result.returncode != 0: |
| 89 | + print("Import check failed:", file=sys.stderr) |
| 90 | + print(result.stderr, file=sys.stderr) |
| 91 | + sys.exit(1) |
| 92 | +
|
| 93 | + print(f"All {len(notebooks)} notebooks OK: syntax valid, {len(import_lines)} unique imports resolved.") |
| 94 | + PYEOF |
0 commit comments