pymc-labs · drbenvincent · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -96,7 +96,7 @@ repos:
   - repo: local
     hooks:
       - id: validate-notebooks
-        name: Validate notebook schema
+        name: Validate notebooks
         entry: python scripts/validate_notebooks.py
         language: python
         files: \.ipynb$

diff --git a/AGENTS.md b/AGENTS.md
@@ -48,7 +48,7 @@ See the [python-environment skill](.github/skills/python-environment/SKILL.md) f
 - **API documentation**: Auto-generated from docstrings via Sphinx autodoc, no manual API docs needed
 - **Build**: Use `make html` to build documentation
 - **Doctest**: Use `make doctest` to test that Python examples in doctests work
-- **Notebook schema validation**: `prek run --all-files` runs `validate-notebooks` to catch `.ipynb` files that are valid JSON but invalid nbformat.
+- **Notebook validation**: `prek run --all-files` runs `validate-notebooks` to catch invalid nbformat and docs notebook convention errors.
 - **Notebook validation failure recovery**: Re-open and save (or re-run) in a notebook-aware editor; if it still fails, restore from `main` and reapply intended edits with notebook-aware tooling; rerun `prek run --all-files`; for docs notebook changes run `$CONDA_EXE run -n CausalPy make html` before pushing.
 - **Scratch files**: Put temporary notes and generated markdown in `.scratch/` (untracked). Move anything that should be kept into a tracked location.
   - **PR drafts**: Create PR summary markdown files in `.scratch/pr_summaries/` (untracked).

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -213,11 +213,11 @@ We recommend that your contribution complies with the following guidelines befor
 
 - Documentation follows [NumPy style guide](https://numpydoc.readthedocs.io/en/latest/format.html)
 
-- Notebook files are validated by prek using `nbformat` schema checks (`validate-notebooks`). Run `prek run --all-files` before pushing to catch malformed `.ipynb` files early.
+- Notebook files are validated by prek using `nbformat` schema checks plus docs notebook conventions (`validate-notebooks`). Run `prek run --all-files` before pushing to catch malformed `.ipynb` files and docs navigation issues early.
 
 - If you have changed the documentation, you should [build the docs locally](#Building-the-documentation-locally) and check that the changes look correct.
 
-- If notebook schema validation fails (`validate-notebooks`), use this recovery loop: (1) reopen and save or re-run the notebook in a notebook-aware editor, (2) if it still fails, restore the notebook from `main` and reapply only the intended edits with notebook-aware tooling, (3) rerun `prek run --all-files`, and (4) for docs notebook changes run `conda run -n CausalPy make html` before pushing.
+- If notebook validation fails (`validate-notebooks`), use this recovery loop: (1) reopen and save or re-run the notebook in a notebook-aware editor for schema errors; for docs convention errors, follow the validator message, (2) if it still fails, restore the notebook from `main` and reapply only the intended edits with notebook-aware tooling, (3) rerun `prek run --all-files`, and (4) for docs notebook changes run `conda run -n CausalPy make html` before pushing.
 
 - Run any of the pre-existing examples in `CausalPy/docs/source/*` that contain analyses that would be affected by your changes to ensure that nothing breaks. This is a useful opportunity to not only check your work for bugs that might not be revealed by unit test, but also to show how your contribution improves CausalPy for end users.
 

diff --git a/causalpy/tests/test_notebook_validation.py b/causalpy/tests/test_notebook_validation.py
@@ -21,10 +21,12 @@
 from pathlib import Path
 
 import nbformat
-from nbformat.v4 import new_code_cell, new_notebook, new_output
+import pytest
+from nbformat.v4 import new_code_cell, new_markdown_cell, new_notebook, new_output
 
 REPO_ROOT = Path(__file__).resolve().parents[2]
 VALIDATOR_SCRIPT = REPO_ROOT / "scripts" / "validate_notebooks.py"
+DOCS_NOTEBOOKS_DIR = REPO_ROOT / "docs" / "source" / "notebooks"
 
 
 def _run_validator(notebook_path: Path) -> subprocess.CompletedProcess[str]:
@@ -49,10 +51,22 @@ def _build_notebook() -> dict:
     )
 
 
+def _write_notebook(notebook_path: Path, notebook: dict) -> None:
+    notebook_path.parent.mkdir(parents=True, exist_ok=True)
+    with notebook_path.open("w", encoding="utf-8") as notebook_file:
+        nbformat.write(notebook, notebook_file)
+
+
+def _write_docs_notebook(tmp_path: Path, name: str, cells: list) -> Path:
+    notebook = new_notebook(cells=cells)
+    notebook_path = tmp_path / "docs" / "source" / "notebooks" / name
+    _write_notebook(notebook_path, notebook)
+    return notebook_path
+
+
 def test_validate_notebooks_accepts_valid_notebook(tmp_path: Path) -> None:
     notebook_path = tmp_path / "valid.ipynb"
-    with notebook_path.open("w", encoding="utf-8") as notebook_file:
-        nbformat.write(_build_notebook(), notebook_file)
+    _write_notebook(notebook_path, _build_notebook())
 
     result = _run_validator(notebook_path)
 
@@ -77,3 +91,212 @@ def test_validate_notebooks_reports_schema_details_for_invalid_output(
     assert "cell[0]" in result.stderr
     assert "output[0]" in result.stderr
     assert "required property" in result.stderr
+
+
+def test_docs_notebook_with_single_h1_passes(tmp_path: Path) -> None:
+    notebook_path = _write_docs_notebook(
+        tmp_path,
+        "single_h1.ipynb",
+        cells=[
+            new_markdown_cell(source="# Lone Title\n\nSome intro text."),
+            new_markdown_cell(source="## Subsection"),
+            new_code_cell(source="x = 1"),
+        ],
+    )
+
+    result = _run_validator(notebook_path)
+
+    assert result.returncode == 0, result.stderr
+    assert result.stderr == ""
+
+
+@pytest.mark.parametrize(
+    ("description", "cells", "expected_count"),
+    [
+        (
+            "no_h1",
+            [
+                new_markdown_cell(source="## Only a subsection\n\nNo top-level."),
+                new_code_cell(source="x = 1"),
+            ],
+            0,
+        ),
+        (
+            "multiple_h1_in_one_cell",
+            [
+                new_markdown_cell(source="# First\n\nIntro\n\n# Second\n\nMore."),
+            ],
+            2,
+        ),
+        (
+            "multiple_h1_across_cells",
+            [
+                new_markdown_cell(source="# First"),
+                new_markdown_cell(source="# Second"),
+                new_markdown_cell(source="# Third"),
+            ],
+            3,
+        ),
+        (
+            "indented_h1",
+            [
+                new_markdown_cell(source="# First"),
+                new_markdown_cell(source="   # Second"),
+            ],
+            2,
+        ),
+    ],
+)
+def test_docs_notebook_with_wrong_h1_count_fails(
+    tmp_path: Path,
+    description: str,
+    cells: list,
+    expected_count: int,
+) -> None:
+    notebook_path = _write_docs_notebook(tmp_path, f"{description}.ipynb", cells=cells)
+
+    result = _run_validator(notebook_path)
+
+    assert result.returncode == 1
+    assert str(notebook_path) in result.stderr
+    assert f"found {expected_count}" in result.stderr
+    assert "exactly one top-level (#) markdown heading" in result.stderr
+
+
+def test_python_comments_in_code_cells_do_not_count_as_h1(tmp_path: Path) -> None:
+    notebook_path = _write_docs_notebook(
+        tmp_path,
+        "code_comments.ipynb",
+        cells=[
+            new_markdown_cell(source="# The Real Title"),
+            new_code_cell(
+                source=("# Calculate average weekly spend\n# Another comment\nx = 1")
+            ),
+        ],
+    )
+
+    result = _run_validator(notebook_path)
+
+    assert result.returncode == 0, result.stderr
+
+
+def test_fenced_code_block_in_markdown_does_not_count_as_h1(
+    tmp_path: Path,
+) -> None:
+    """Replicates the its_lift_test.ipynb pattern: ```python``` block with
+    `# comment` inside a markdown cell must not register as additional H1s."""
+    markdown_with_fenced_python = (
+        "## Key outputs\n"
+        "\n"
+        "```python\n"
+        "# Calculate average weekly spend during the promo period\n"
+        "# Extract mean lift statistics from the ITS analysis\n"
+        "spend = 100\n"
+        "```\n"
+        "\n"
+        "Some closing text."
+    )
+    notebook_path = _write_docs_notebook(
+        tmp_path,
+        "fenced_python_in_markdown.ipynb",
+        cells=[
+            new_markdown_cell(source="# Real Title"),
+            new_markdown_cell(source=markdown_with_fenced_python),
+        ],
+    )
+
+    result = _run_validator(notebook_path)
+
+    assert result.returncode == 0, result.stderr
+
+
+def test_tilde_fenced_block_in_markdown_does_not_count_as_h1(
+    tmp_path: Path,
+) -> None:
+    markdown_with_tilde_fence = "Example:\n\n~~~python\n# Not a heading\ny = 2\n~~~\n"
+    notebook_path = _write_docs_notebook(
+        tmp_path,
+        "tilde_fenced.ipynb",
+        cells=[
+            new_markdown_cell(source="# Real Title"),
+            new_markdown_cell(source=markdown_with_tilde_fence),
+        ],
+    )
+
+    result = _run_validator(notebook_path)
+
+    assert result.returncode == 0, result.stderr
+
+
+def test_longer_fenced_block_can_contain_shorter_fence_and_h1_like_comment(
+    tmp_path: Path,
+) -> None:
+    markdown_with_nested_fence = (
+        "````markdown\n```python\n# Not a heading\nx = 1\n```\n````\n"
+    )
+    notebook_path = _write_docs_notebook(
+        tmp_path,
+        "nested_fence.ipynb",
+        cells=[
+            new_markdown_cell(source="# Real Title"),
+            new_markdown_cell(source=markdown_with_nested_fence),
+        ],
+    )
+
+    result = _run_validator(notebook_path)
+
+    assert result.returncode == 0, result.stderr
+
+
+def test_four_space_indented_hash_does_not_count_as_h1(tmp_path: Path) -> None:
+    notebook_path = _write_docs_notebook(
+        tmp_path,
+        "indented_code.ipynb",
+        cells=[
+            new_markdown_cell(source="# Real Title"),
+            new_markdown_cell(source="    # This is an indented code block"),
+        ],
+    )
+
+    result = _run_validator(notebook_path)
+
+    assert result.returncode == 0, result.stderr
+
+
+def test_h1_check_only_applies_to_docs_notebooks(tmp_path: Path) -> None:
+    """Notebooks outside docs/source/notebooks/ are exempt from the H1 rule."""
+    notebook = new_notebook(
+        cells=[
+            new_markdown_cell(source="# First"),
+            new_markdown_cell(source="# Second"),
+            new_markdown_cell(source="# Third"),
+        ]
+    )
+    notebook_path = tmp_path / "scratch_notebook.ipynb"
+    _write_notebook(notebook_path, notebook)
+
+    result = _run_validator(notebook_path)
+
+    assert result.returncode == 0, result.stderr
+    assert result.stderr == ""
+
+
+def test_all_docs_notebooks_pass_h1_check() -> None:
+    """Regression test: every checked-in docs notebook must satisfy the rule."""
+    # pragma directives below exclude defensive guards from coverage: in a
+    # normal checkout the docs notebooks directory exists and is non-empty, so
+    # these branches never execute under CI / codecov.
+    if not DOCS_NOTEBOOKS_DIR.is_dir():  # pragma: no cover
+        pytest.skip("docs/source/notebooks directory not present in this checkout")
+
+    notebooks = sorted(DOCS_NOTEBOOKS_DIR.glob("*.ipynb"))
+    if not notebooks:  # pragma: no cover
+        pytest.skip("no notebooks found under docs/source/notebooks")
+
+    result = subprocess.run(
+        [sys.executable, str(VALIDATOR_SCRIPT), *map(str, notebooks)],
+        check=False,
+        capture_output=True,
+        text=True,
+    )
+    assert result.returncode == 0, result.stderr