Skip to content

Commit 44ece7c

Browse files
authored
Wire generated reference docs into checks
1 parent a1e09ef commit 44ece7c

7 files changed

Lines changed: 311 additions & 18 deletions

File tree

.github/workflows/pr_docs_changes.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,15 @@ jobs:
1818
steps:
1919
- name: Checkout repo
2020
uses: actions/checkout@v4
21+
- name: Install uv
22+
uses: astral-sh/setup-uv@v5
23+
- name: Set up Python
24+
uses: actions/setup-python@v5
25+
with:
26+
python-version: '3.13'
2127
- name: Set up Quarto
2228
uses: quarto-dev/quarto-actions/setup@v2
29+
- name: Test reference generator smoke build
30+
run: make docs-reference-smoke
2331
- name: Test documentation builds
24-
run: quarto render docs
32+
run: make docs

Makefile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,21 @@
1-
.PHONY: docs docs-serve
1+
.PHONY: docs docs-serve docs-generate-reference docs-reference-smoke
22

33
all: build-package
44

55
docs:
66
quarto render docs
77

8+
docs-generate-reference:
9+
uv run --extra us python docs/_generator/build_reference.py --country us --out docs/_generated/reference/us
10+
11+
docs-reference-smoke:
12+
rm -rf /tmp/policyengine-reference-smoke
13+
uv run --extra us python docs/_generator/build_reference.py --country us --filter chip --out /tmp/policyengine-reference-smoke/us
14+
quarto render /tmp/policyengine-reference-smoke/us/index.qmd --output-dir /tmp/policyengine-reference-smoke/rendered/root
15+
quarto render /tmp/policyengine-reference-smoke/us/programs.qmd --output-dir /tmp/policyengine-reference-smoke/rendered/program-index
16+
quarto render /tmp/policyengine-reference-smoke/us/programs/chip.qmd --output-dir /tmp/policyengine-reference-smoke/rendered/program
17+
quarto render /tmp/policyengine-reference-smoke/us/gov/hhs/chip/chip.qmd --output-dir /tmp/policyengine-reference-smoke/rendered/variable
18+
819
docs-serve:
920
quarto preview docs
1021

docs/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,6 @@ _site
33
_freeze
44
/.quarto/
55
**/*.quarto_ipynb
6+
7+
# Generated reference output can be rebuilt from installed country models.
8+
_generated/

docs/_generator/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Reference generator prototype
22

3-
Auto-generates one Quarto page per variable in a country model, plus a program-coverage page, purely from metadata on the `Variable` classes and `programs.yaml`.
3+
Auto-generates one Quarto page per variable in a country model, plus a program coverage index and one page per program, purely from metadata on the `Variable` classes and `programs.yaml`.
44

55
## Run
66

@@ -29,7 +29,7 @@ Per variable:
2929
- Statutory references (from `reference = ...`)
3030
- Source file path and line number
3131

32-
Per program: a row in the generated program-coverage page pulled from `programs.yaml` (id, name, category, agency, status, coverage).
32+
Per program: a row in the generated program coverage index pulled from `programs.yaml` (name, category, agency, status, coverage, root variable), plus a generated program page with metadata, notes, and links to implementation variables.
3333

3434
Per directory (`gov/hhs/chip/`, `gov/usda/snap/`, etc.): a listing page using Quarto's built-in directory listing so the nav auto-organizes.
3535

@@ -49,4 +49,4 @@ Extensions worth considering:
4949
1. Walk `parameters/` YAML tree and emit a page per parameter with its time series, breakdowns, and references.
5050
2. For each variable with a formula, surface the dependency graph (other variables / parameters it reads). `policyengine_core`'s `Variable.exhaustive_parameter_dependencies` gets partway there.
5151
3. For each calibration target (in `policyengine-us-data/storage/calibration_targets/*.csv`), emit a page describing source, aggregation level, freshness.
52-
4. Cross-link variables to the programs they contribute to via `programs.yaml`'s `variable:` field.
52+
4. Add reverse links from variable pages back to the programs that use them.

docs/_generator/build_reference.py

Lines changed: 234 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import argparse
4848
import importlib
4949
import logging
50+
import os
5051
import re
5152
import textwrap
5253
from dataclasses import dataclass
@@ -85,6 +86,10 @@ class VariableRecord:
8586
tree_path: tuple[str, ...]
8687

8788

89+
def _variable_page_path(record: VariableRecord, out_root: Path) -> Path:
90+
return out_root.joinpath(*record.tree_path) / f"{_slug(record.name)}.qmd"
91+
92+
8893
def _tree_path_from_source(
8994
source_file: Path | None, package_root: Path
9095
) -> tuple[str, ...]:
@@ -255,16 +260,26 @@ def _slug(value: str) -> str:
255260
return re.sub(r"[^A-Za-z0-9_-]+", "-", value).strip("-")
256261

257262

263+
def _relative_link(source: Path, target: Path) -> str:
264+
return os.path.relpath(target, start=source.parent).replace(os.sep, "/")
265+
266+
267+
def _table_cell(value: object) -> str:
268+
if value is None:
269+
return ""
270+
return str(value).replace("\n", " ").replace("|", "\\|")
271+
272+
258273
def _write_variables(
259274
records: list[VariableRecord],
260275
out_root: Path,
261276
country: str,
262277
) -> int:
263278
written = 0
264279
for record in records:
265-
tree_dir = out_root.joinpath(*record.tree_path)
280+
page_path = _variable_page_path(record, out_root)
281+
tree_dir = page_path.parent
266282
tree_dir.mkdir(parents=True, exist_ok=True)
267-
page_path = tree_dir / f"{_slug(record.name)}.qmd"
268283
page_path.write_text(_render_variable_page(record, country))
269284
written += 1
270285
return written
@@ -295,36 +310,240 @@ def _write_tree_indices(out_root: Path) -> int:
295310
return written
296311

297312

298-
def _write_programs_index(country: str, out_root: Path) -> int:
313+
def _load_programs(country: str) -> list[dict]:
299314
module_name = COUNTRY_MODULES[country]
300315
country_module = importlib.import_module(module_name)
301316
package_root = Path(country_module.__file__).parent
302317
programs_path = package_root / "programs.yaml"
303318
if not programs_path.exists():
304-
return 0
319+
return []
305320
with programs_path.open() as f:
306321
registry = yaml.safe_load(f)
307-
programs = registry.get("programs", [])
322+
return registry.get("programs", [])
323+
324+
325+
def _program_page_path(program: dict, out_root: Path) -> Path:
326+
identifier = program.get("id") or program.get("name") or "program"
327+
return out_root / "programs" / f"{_slug(str(identifier))}.qmd"
328+
329+
330+
def _program_title(program: dict) -> str:
331+
return str(program.get("full_name") or program.get("name") or program.get("id"))
332+
333+
334+
def _program_variable_records(
335+
program: dict,
336+
records: list[VariableRecord],
337+
) -> list[VariableRecord]:
338+
root_variable = program.get("variable")
339+
parameter_prefix = program.get("parameter_prefix")
340+
prefix_parts = (
341+
tuple(str(parameter_prefix).replace("/", ".").split("."))
342+
if parameter_prefix
343+
else ()
344+
)
345+
selected: list[VariableRecord] = []
346+
for record in records:
347+
if root_variable and record.name == root_variable:
348+
selected.append(record)
349+
continue
350+
if prefix_parts and record.tree_path[: len(prefix_parts)] == prefix_parts:
351+
selected.append(record)
352+
353+
return sorted(
354+
selected,
355+
key=lambda record: (
356+
0 if root_variable and record.name == root_variable else 1,
357+
"/".join(record.tree_path),
358+
record.name,
359+
),
360+
)
361+
362+
363+
def _render_program_variable_link(
364+
record: VariableRecord,
365+
record_pages: dict[str, Path],
366+
page_path: Path,
367+
) -> str:
368+
target = record_pages.get(record.name)
369+
if target is None:
370+
return f"`{record.name}`"
371+
return f"[`{record.name}`]({_relative_link(page_path, target)})"
372+
373+
374+
def _render_program_page(
375+
program: dict,
376+
records: list[VariableRecord],
377+
record_pages: dict[str, Path],
378+
out_root: Path,
379+
) -> str:
380+
page_path = _program_page_path(program, out_root)
381+
title = _program_title(program)
382+
identifier = str(program.get("id") or "")
383+
lines: list[str] = [
384+
"---",
385+
f'title: "{_escape_yaml_scalar(title)}"',
386+
]
387+
if identifier:
388+
lines.append(f'subtitle: "`{_escape_yaml_scalar(identifier)}`"')
389+
lines.extend(["---", ""])
390+
391+
root_variable = program.get("variable")
392+
if root_variable and root_variable in record_pages:
393+
root_value = (
394+
f"[`{root_variable}`]"
395+
f"({_relative_link(page_path, record_pages[str(root_variable)])})"
396+
)
397+
elif root_variable:
398+
root_value = f"`{root_variable}`"
399+
else:
400+
root_value = ""
401+
402+
verified_start_year = program.get("verified_start_year")
403+
verified_end_year = program.get("verified_end_year")
404+
if verified_start_year and verified_end_year:
405+
verified = f"{verified_start_year}-{verified_end_year}"
406+
elif verified_start_year:
407+
verified = f"{verified_start_year}+"
408+
elif verified_end_year:
409+
verified = f"through {verified_end_year}"
410+
else:
411+
verified = ""
412+
413+
metadata = [
414+
("Program ID", f"`{identifier}`" if identifier else ""),
415+
("Category", program.get("category")),
416+
("Agency", program.get("agency")),
417+
("Status", program.get("status")),
418+
("Coverage", program.get("coverage")),
419+
(
420+
"State variation",
421+
"Yes" if program.get("has_state_variation") else "No",
422+
),
423+
("Verification years", verified),
424+
(
425+
"Parameter prefix",
426+
f"`{program.get('parameter_prefix')}`"
427+
if program.get("parameter_prefix")
428+
else "",
429+
),
430+
("Root variable", root_value),
431+
]
432+
lines.append("| Field | Value |")
433+
lines.append("|---|---|")
434+
for key, value in metadata:
435+
lines.append(f"| {key} | {_table_cell(value)} |")
436+
lines.append("")
437+
438+
if program.get("notes"):
439+
lines.append("## Notes")
440+
lines.append("")
441+
lines.append(str(program["notes"]))
442+
lines.append("")
443+
444+
program_records = _program_variable_records(program, records)
445+
lines.append("## Implementation variables")
446+
lines.append("")
447+
if program_records:
448+
lines.append("| Variable | Label | Entity | Period |")
449+
lines.append("|---|---|---|---|")
450+
for record in program_records:
451+
lines.append(
452+
"| "
453+
+ " | ".join(
454+
[
455+
_render_program_variable_link(record, record_pages, page_path),
456+
_table_cell(record.label),
457+
f"`{record.entity}`" if record.entity else "",
458+
f"`{record.definition_period}`"
459+
if record.definition_period
460+
else "",
461+
]
462+
)
463+
+ " |"
464+
)
465+
lines.append("")
466+
else:
467+
lines.append(
468+
"No implementation variables were emitted for this program in this "
469+
"reference run."
470+
)
471+
lines.append("")
472+
473+
return "\n".join(lines)
474+
475+
476+
def _write_program_pages(
477+
programs: list[dict],
478+
records: list[VariableRecord],
479+
out_root: Path,
480+
) -> int:
481+
if not programs:
482+
return 0
483+
record_pages = {
484+
record.name: _variable_page_path(record, out_root) for record in records
485+
}
486+
program_dir = out_root / "programs"
487+
program_dir.mkdir(parents=True, exist_ok=True)
488+
for program in programs:
489+
page_path = _program_page_path(program, out_root)
490+
page_path.write_text(
491+
_render_program_page(program, records, record_pages, out_root)
492+
)
493+
return len(programs)
494+
495+
496+
def _write_programs_index(
497+
programs: list[dict],
498+
records: list[VariableRecord],
499+
out_root: Path,
500+
) -> int:
501+
if not programs:
502+
return 0
503+
record_pages = {
504+
record.name: _variable_page_path(record, out_root) for record in records
505+
}
506+
programs_index_path = out_root / "programs.qmd"
308507
lines: list[str] = [
309508
"---",
310509
'title: "Program coverage"',
311510
'description: "Programs modeled in the country model, generated from programs.yaml."',
312511
"---",
313512
"",
314-
"| ID | Name | Category | Agency | Status | Coverage |",
513+
"| Program | Category | Agency | Status | Coverage | Root variable |",
315514
"|---|---|---|---|---|---|",
316515
]
317516
for program in programs:
517+
page_path = _program_page_path(program, out_root)
518+
program_link = (
519+
f"[{_program_title(program)}]"
520+
f"({_relative_link(programs_index_path, page_path)})"
521+
)
522+
root_variable = program.get("variable")
523+
if root_variable and root_variable in record_pages:
524+
root_value = (
525+
f"[`{root_variable}`]"
526+
f"({_relative_link(programs_index_path, record_pages[str(root_variable)])})"
527+
)
528+
elif root_variable:
529+
root_value = f"`{root_variable}`"
530+
else:
531+
root_value = ""
318532
lines.append(
319533
"| "
320534
+ " | ".join(
321-
str(program.get(field, "")).replace("\n", " ")
322-
for field in ("id", "name", "category", "agency", "status", "coverage")
535+
[
536+
_table_cell(program_link),
537+
_table_cell(program.get("category")),
538+
_table_cell(program.get("agency")),
539+
_table_cell(program.get("status")),
540+
_table_cell(program.get("coverage")),
541+
_table_cell(root_value),
542+
]
323543
)
324544
+ " |"
325545
)
326-
target = out_root / "programs.qmd"
327-
target.write_text("\n".join(lines) + "\n")
546+
programs_index_path.write_text("\n".join(lines) + "\n")
328547
return 1
329548

330549

@@ -344,11 +563,13 @@ def build_reference(
344563
or needle in " ".join(str(p).lower() for p in r.tree_path)
345564
]
346565
variables_written = _write_variables(records, out_root, country)
347-
programs_written = _write_programs_index(country, out_root)
566+
programs = _load_programs(country)
567+
program_pages_written = _write_program_pages(programs, records, out_root)
568+
programs_index_written = _write_programs_index(programs, records, out_root)
348569
indices_written = _write_tree_indices(out_root)
349570
return {
350571
"variables": variables_written,
351-
"programs": programs_written,
572+
"programs": program_pages_written + programs_index_written,
352573
"indices": indices_written,
353574
}
354575

@@ -380,7 +601,7 @@ def main() -> None:
380601
args = _parse_args()
381602
stats = build_reference(args.country, args.out, args.filter)
382603
logger.info(
383-
"Wrote %d variable pages, %d programs page, %d directory indices to %s",
604+
"Wrote %d variable pages, %d program pages, %d directory indices to %s",
384605
stats["variables"],
385606
stats["programs"],
386607
stats["indices"],

docs/_quarto.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ website:
5151
- programs/us-chip.md
5252
- section: "Reference"
5353
contents:
54+
- reference/index.md
5455
- countries.md
5556
- release-bundles.md
5657
- data-publishing-design.md

0 commit comments

Comments
 (0)