Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ repos:
- repo: local
hooks:
- id: validate-notebooks
name: Validate notebook schema
name: Validate notebooks
entry: python scripts/validate_notebooks.py
language: python
files: \.ipynb$
Expand Down
2 changes: 1 addition & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ See the [python-environment skill](.github/skills/python-environment/SKILL.md) f
- **API documentation**: Auto-generated from docstrings via Sphinx autodoc, no manual API docs needed
- **Build**: Use `make html` to build documentation
- **Doctest**: Use `make doctest` to test that Python examples in doctests work
- **Notebook schema validation**: `prek run --all-files` runs `validate-notebooks` to catch `.ipynb` files that are valid JSON but invalid nbformat.
- **Notebook validation**: `prek run --all-files` runs `validate-notebooks` to catch invalid nbformat and docs notebook convention errors.
- **Notebook validation failure recovery**: Re-open and save (or re-run) in a notebook-aware editor; if it still fails, restore from `main` and reapply intended edits with notebook-aware tooling; rerun `prek run --all-files`; for docs notebook changes run `$CONDA_EXE run -n CausalPy make html` before pushing.
- **Scratch files**: Put temporary notes and generated markdown in `.scratch/` (untracked). Move anything that should be kept into a tracked location.
- **PR drafts**: Create PR summary markdown files in `.scratch/pr_summaries/` (untracked).
Expand Down
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,11 +213,11 @@ We recommend that your contribution complies with the following guidelines befor

- Documentation follows [NumPy style guide](https://numpydoc.readthedocs.io/en/latest/format.html)

- Notebook files are validated by prek using `nbformat` schema checks (`validate-notebooks`). Run `prek run --all-files` before pushing to catch malformed `.ipynb` files early.
- Notebook files are validated by prek using `nbformat` schema checks plus docs notebook conventions (`validate-notebooks`). Run `prek run --all-files` before pushing to catch malformed `.ipynb` files and docs navigation issues early.

- If you have changed the documentation, you should [build the docs locally](#Building-the-documentation-locally) and check that the changes look correct.

- If notebook schema validation fails (`validate-notebooks`), use this recovery loop: (1) reopen and save or re-run the notebook in a notebook-aware editor, (2) if it still fails, restore the notebook from `main` and reapply only the intended edits with notebook-aware tooling, (3) rerun `prek run --all-files`, and (4) for docs notebook changes run `conda run -n CausalPy make html` before pushing.
- If notebook validation fails (`validate-notebooks`), use this recovery loop: (1) reopen and save or re-run the notebook in a notebook-aware editor for schema errors; for docs convention errors, follow the validator message, (2) if it still fails, restore the notebook from `main` and reapply only the intended edits with notebook-aware tooling, (3) rerun `prek run --all-files`, and (4) for docs notebook changes run `conda run -n CausalPy make html` before pushing.

- Run any of the pre-existing examples in `CausalPy/docs/source/*` that contain analyses that would be affected by your changes to ensure that nothing breaks. This is a useful opportunity to not only check your work for bugs that might not be revealed by unit test, but also to show how your contribution improves CausalPy for end users.

Expand Down
229 changes: 226 additions & 3 deletions causalpy/tests/test_notebook_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@
from pathlib import Path

import nbformat
from nbformat.v4 import new_code_cell, new_notebook, new_output
import pytest
from nbformat.v4 import new_code_cell, new_markdown_cell, new_notebook, new_output

REPO_ROOT = Path(__file__).resolve().parents[2]
VALIDATOR_SCRIPT = REPO_ROOT / "scripts" / "validate_notebooks.py"
DOCS_NOTEBOOKS_DIR = REPO_ROOT / "docs" / "source" / "notebooks"


def _run_validator(notebook_path: Path) -> subprocess.CompletedProcess[str]:
Expand All @@ -49,10 +51,22 @@ def _build_notebook() -> dict:
)


def _write_notebook(notebook_path: Path, notebook: dict) -> None:
notebook_path.parent.mkdir(parents=True, exist_ok=True)
with notebook_path.open("w", encoding="utf-8") as notebook_file:
nbformat.write(notebook, notebook_file)


def _write_docs_notebook(tmp_path: Path, name: str, cells: list) -> Path:
notebook = new_notebook(cells=cells)
notebook_path = tmp_path / "docs" / "source" / "notebooks" / name
_write_notebook(notebook_path, notebook)
return notebook_path


def test_validate_notebooks_accepts_valid_notebook(tmp_path: Path) -> None:
notebook_path = tmp_path / "valid.ipynb"
with notebook_path.open("w", encoding="utf-8") as notebook_file:
nbformat.write(_build_notebook(), notebook_file)
_write_notebook(notebook_path, _build_notebook())

result = _run_validator(notebook_path)

Expand All @@ -77,3 +91,212 @@ def test_validate_notebooks_reports_schema_details_for_invalid_output(
assert "cell[0]" in result.stderr
assert "output[0]" in result.stderr
assert "required property" in result.stderr


def test_docs_notebook_with_single_h1_passes(tmp_path: Path) -> None:
notebook_path = _write_docs_notebook(
tmp_path,
"single_h1.ipynb",
cells=[
new_markdown_cell(source="# Lone Title\n\nSome intro text."),
new_markdown_cell(source="## Subsection"),
new_code_cell(source="x = 1"),
],
)

result = _run_validator(notebook_path)

assert result.returncode == 0, result.stderr
assert result.stderr == ""


@pytest.mark.parametrize(
("description", "cells", "expected_count"),
[
(
"no_h1",
[
new_markdown_cell(source="## Only a subsection\n\nNo top-level."),
new_code_cell(source="x = 1"),
],
0,
),
(
"multiple_h1_in_one_cell",
[
new_markdown_cell(source="# First\n\nIntro\n\n# Second\n\nMore."),
],
2,
),
(
"multiple_h1_across_cells",
[
new_markdown_cell(source="# First"),
new_markdown_cell(source="# Second"),
new_markdown_cell(source="# Third"),
],
3,
),
(
"indented_h1",
[
new_markdown_cell(source="# First"),
new_markdown_cell(source=" # Second"),
],
2,
),
],
)
def test_docs_notebook_with_wrong_h1_count_fails(
tmp_path: Path,
description: str,
cells: list,
expected_count: int,
) -> None:
notebook_path = _write_docs_notebook(tmp_path, f"{description}.ipynb", cells=cells)

result = _run_validator(notebook_path)

assert result.returncode == 1
assert str(notebook_path) in result.stderr
assert f"found {expected_count}" in result.stderr
assert "exactly one top-level (#) markdown heading" in result.stderr


def test_python_comments_in_code_cells_do_not_count_as_h1(tmp_path: Path) -> None:
notebook_path = _write_docs_notebook(
tmp_path,
"code_comments.ipynb",
cells=[
new_markdown_cell(source="# The Real Title"),
new_code_cell(
source=("# Calculate average weekly spend\n# Another comment\nx = 1")
),
],
)

result = _run_validator(notebook_path)

assert result.returncode == 0, result.stderr


def test_fenced_code_block_in_markdown_does_not_count_as_h1(
tmp_path: Path,
) -> None:
"""Replicates the its_lift_test.ipynb pattern: ```python``` block with
`# comment` inside a markdown cell must not register as additional H1s."""
markdown_with_fenced_python = (
"## Key outputs\n"
"\n"
"```python\n"
"# Calculate average weekly spend during the promo period\n"
"# Extract mean lift statistics from the ITS analysis\n"
"spend = 100\n"
"```\n"
"\n"
"Some closing text."
)
notebook_path = _write_docs_notebook(
tmp_path,
"fenced_python_in_markdown.ipynb",
cells=[
new_markdown_cell(source="# Real Title"),
new_markdown_cell(source=markdown_with_fenced_python),
],
)

result = _run_validator(notebook_path)

assert result.returncode == 0, result.stderr


def test_tilde_fenced_block_in_markdown_does_not_count_as_h1(
tmp_path: Path,
) -> None:
markdown_with_tilde_fence = "Example:\n\n~~~python\n# Not a heading\ny = 2\n~~~\n"
notebook_path = _write_docs_notebook(
tmp_path,
"tilde_fenced.ipynb",
cells=[
new_markdown_cell(source="# Real Title"),
new_markdown_cell(source=markdown_with_tilde_fence),
],
)

result = _run_validator(notebook_path)

assert result.returncode == 0, result.stderr


def test_longer_fenced_block_can_contain_shorter_fence_and_h1_like_comment(
tmp_path: Path,
) -> None:
markdown_with_nested_fence = (
"````markdown\n```python\n# Not a heading\nx = 1\n```\n````\n"
)
notebook_path = _write_docs_notebook(
tmp_path,
"nested_fence.ipynb",
cells=[
new_markdown_cell(source="# Real Title"),
new_markdown_cell(source=markdown_with_nested_fence),
],
)

result = _run_validator(notebook_path)

assert result.returncode == 0, result.stderr


def test_four_space_indented_hash_does_not_count_as_h1(tmp_path: Path) -> None:
notebook_path = _write_docs_notebook(
tmp_path,
"indented_code.ipynb",
cells=[
new_markdown_cell(source="# Real Title"),
new_markdown_cell(source=" # This is an indented code block"),
],
)

result = _run_validator(notebook_path)

assert result.returncode == 0, result.stderr


def test_h1_check_only_applies_to_docs_notebooks(tmp_path: Path) -> None:
"""Notebooks outside docs/source/notebooks/ are exempt from the H1 rule."""
notebook = new_notebook(
cells=[
new_markdown_cell(source="# First"),
new_markdown_cell(source="# Second"),
new_markdown_cell(source="# Third"),
]
)
notebook_path = tmp_path / "scratch_notebook.ipynb"
_write_notebook(notebook_path, notebook)

result = _run_validator(notebook_path)

assert result.returncode == 0, result.stderr
assert result.stderr == ""


def test_all_docs_notebooks_pass_h1_check() -> None:
"""Regression test: every checked-in docs notebook must satisfy the rule."""
# pragma directives below exclude defensive guards from coverage: in a
# normal checkout the docs notebooks directory exists and is non-empty, so
# these branches never execute under CI / codecov.
if not DOCS_NOTEBOOKS_DIR.is_dir(): # pragma: no cover
pytest.skip("docs/source/notebooks directory not present in this checkout")

notebooks = sorted(DOCS_NOTEBOOKS_DIR.glob("*.ipynb"))
if not notebooks: # pragma: no cover
pytest.skip("no notebooks found under docs/source/notebooks")

result = subprocess.run(
[sys.executable, str(VALIDATOR_SCRIPT), *map(str, notebooks)],
check=False,
capture_output=True,
text=True,
)
assert result.returncode == 0, result.stderr
Loading
Loading