|
| 1 | +name: Check Jupyter Notebooks |
| 2 | + |
| 3 | +on: |
| 4 | + pull_request: |
| 5 | + branches: |
| 6 | + - main |
| 7 | + # Run when notebooks, Python dependencies, or this workflow change |
| 8 | + paths: |
| 9 | + - 'domains/**/explore/*.ipynb' |
| 10 | + - 'pyproject.toml' |
| 11 | + - 'uv.lock' |
| 12 | + - 'scripts/activateUvEnvironment.sh' |
| 13 | + - '.github/workflows/internal-check-notebooks.yml' |
| 14 | + |
| 15 | +jobs: |
| 16 | + discover-notebooks: |
| 17 | + runs-on: ubuntu-22.04 |
| 18 | + outputs: |
| 19 | + notebooks: ${{ steps.list.outputs.notebooks }} |
| 20 | + |
| 21 | + steps: |
| 22 | + - name: Checkout GIT Repository |
| 23 | + uses: actions/checkout@v6 |
| 24 | + |
| 25 | + - name: List all explore notebooks |
| 26 | + id: list |
| 27 | + run: | |
| 28 | + notebooks=$(find domains -path '*/explore/*.ipynb' | sort | python3 -c " |
| 29 | + import json, sys |
| 30 | + paths = [p.strip() for p in sys.stdin if p.strip()] |
| 31 | + print(json.dumps(paths)) |
| 32 | + ") |
| 33 | + echo "notebooks=$notebooks" >> "$GITHUB_OUTPUT" |
| 34 | +
|
| 35 | + smoke-test-notebook: |
| 36 | + needs: discover-notebooks |
| 37 | + runs-on: ubuntu-22.04 |
| 38 | + strategy: |
| 39 | + fail-fast: false |
| 40 | + matrix: |
| 41 | + notebook: ${{ fromJson(needs.discover-notebooks.outputs.notebooks) }} |
| 42 | + |
| 43 | + steps: |
| 44 | + - name: Checkout GIT Repository |
| 45 | + uses: actions/checkout@v6 |
| 46 | + |
| 47 | + - name: (uv Setup) Install uv |
| 48 | + uses: astral-sh/setup-uv@v6 |
| 49 | + with: |
| 50 | + python-version: '3.12' |
| 51 | + |
| 52 | + - name: (uv Setup) Sync dependencies from lockfile |
| 53 | + run: uv sync --frozen |
| 54 | + |
| 55 | + - name: Execute notebook (allow cell errors) |
| 56 | + # NEO4J_INITIAL_PASSWORD is required by notebooks but Neo4j is not available in CI. |
| 57 | + # --allow-errors lets all cells run even if Neo4j connection or query cells fail. |
| 58 | + env: |
| 59 | + NEO4J_INITIAL_PASSWORD: smoke-test-no-neo4j |
| 60 | + run: | |
| 61 | + uv run --with nbconvert python -m nbconvert \ |
| 62 | + --to notebook \ |
| 63 | + --execute \ |
| 64 | + --allow-errors \ |
| 65 | + --ExecutePreprocessor.timeout=300 \ |
| 66 | + --output executed \ |
| 67 | + --output-dir . \ |
| 68 | + "${{ matrix.notebook }}" |
| 69 | +
|
| 70 | + - name: Check for import and syntax errors in executed notebook |
| 71 | + # Only fatal errors that prevent the notebook from running at all are checked: |
| 72 | + # ModuleNotFoundError, ImportError, SyntaxError. |
| 73 | + # All Neo4j connection errors and cascading errors from missing data are expected and ignored. |
| 74 | + run: | |
| 75 | + uv run python3 - <<'PYEOF' |
| 76 | + import json, sys |
| 77 | +
|
| 78 | + nb = json.load(open("executed.ipynb")) |
| 79 | + fatal_error_types = {"ModuleNotFoundError", "ImportError", "SyntaxError"} |
| 80 | +
|
| 81 | + failures = [] |
| 82 | + for cell_idx, cell in enumerate(nb["cells"], 1): |
| 83 | + for output in cell.get("outputs", []): |
| 84 | + if output.get("output_type") != "error": |
| 85 | + continue |
| 86 | + ename = output.get("ename", "") |
| 87 | + evalue = output.get("evalue", "") |
| 88 | + if ename in fatal_error_types: |
| 89 | + failures.append(f"Cell {cell_idx}: {ename}: {evalue}") |
| 90 | +
|
| 91 | + if failures: |
| 92 | + print("Fatal errors found in notebook:", file=sys.stderr) |
| 93 | + for f in failures: |
| 94 | + print(f" {f}", file=sys.stderr) |
| 95 | + sys.exit(1) |
| 96 | +
|
| 97 | + print(f"OK: {len(nb['cells'])} cells executed, no import/syntax errors.") |
| 98 | + PYEOF |
0 commit comments