Skip to content

Commit b2ba187

Browse files
committed
ci: gate docstring quality and coverage in CI (#616)
Add a hard-fail docstring quality gate to the docs-publish workflow: - New 'Docstring quality gate' step runs --quality --fail-on-quality --threshold 100; fails if any quality issue is found or coverage drops below 100% (both currently pass in CI) - Existing audit_coverage step (soft-fail, threshold 80) retained for the summary coverage metric Add typeddict_mismatch checks to audit_coverage.py: - typeddict_phantom: Attributes: documents a field not declared in the TypedDict - typeddict_undocumented: declared field absent from Attributes: section - Mirrors the existing param_mismatch logic for functions Pre-commit: enable --fail-on-quality on the manual-stage hook (CI is the hard gate; hook remains stages: [manual] as docs must be pre-built). Update CONTRIBUTING.md and docs/docs/guide/CONTRIBUTING.md with TypedDict docstring requirements and the two new audit check kinds.
1 parent 46935a8 commit b2ba187

5 files changed

Lines changed: 100 additions & 35 deletions

File tree

.github/workflows/docs-publish.yml

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,17 @@ jobs:
105105
id: audit_coverage
106106
run: |
107107
set -o pipefail
108-
uv run python tooling/docs-autogen/audit_coverage.py --docs-dir docs/docs/api --threshold 80 --quality 2>&1 \
108+
uv run python tooling/docs-autogen/audit_coverage.py --docs-dir docs/docs/api --threshold 80 2>&1 \
109109
| tee /tmp/audit_coverage.log
110110
continue-on-error: ${{ inputs.strict_validation != true }}
111111

112+
- name: Docstring quality gate
113+
id: quality_gate
114+
run: |
115+
set -o pipefail
116+
uv run python tooling/docs-autogen/audit_coverage.py --docs-dir docs/docs/api --quality --fail-on-quality --threshold 100 2>&1 \
117+
| tee /tmp/quality_gate.log
118+
112119
# -- Upload artifact for deploy job --------------------------------------
113120

114121
- name: Upload docs artifact
@@ -141,12 +148,14 @@ jobs:
141148
markdownlint_outcome = "${{ steps.markdownlint.outcome }}"
142149
validate_outcome = "${{ steps.validate_mdx.outcome }}"
143150
coverage_outcome = "${{ steps.audit_coverage.outcome }}"
151+
quality_gate_outcome = "${{ steps.quality_gate.outcome }}"
144152
strict = "${{ inputs.strict_validation }}" == "true"
145153
mode = "" if strict else " *(soft-fail)*"
146154
147155
lint_log = read_log("/tmp/markdownlint.log")
148156
validate_log = read_log("/tmp/validate_mdx.log")
149157
coverage_log = read_log("/tmp/audit_coverage.log")
158+
quality_gate_log = read_log("/tmp/quality_gate.log")
150159
151160
# Count markdownlint issues (lines matching file:line:col format)
152161
lint_issues = len([l for l in lint_log.splitlines() if re.match(r'.+:\d+:\d+ ', l)])
@@ -186,27 +195,11 @@ jobs:
186195
187196
mdx_detail = parse_validate_detail(validate_log)
188197
189-
# Docstring quality annotation emitted by audit_coverage.py into the log
198+
# Parse docstring quality annotation from quality gate log
190199
# Format: ::notice title=Docstring quality::message
191-
# or ::warning title=Docstring quality::message
192-
quality_match = re.search(r"::(notice|warning|error) title=Docstring quality::(.+)", coverage_log)
193-
if quality_match:
194-
quality_level, quality_msg = quality_match.group(1), quality_match.group(2)
195-
quality_icon = "✅" if quality_level == "notice" else "⚠️"
196-
quality_status = "pass" if quality_level == "notice" else "warning"
197-
quality_detail = re.sub(r"\s*—\s*see job summary.*$", "", quality_msg)
198-
quality_row = f"| Docstring Quality | {quality_icon} {quality_status}{mode} | {quality_detail} |"
199-
else:
200-
quality_row = None
201-
202-
# Split coverage log at quality section to avoid duplicate output in collapsibles
203-
quality_start = coverage_log.find("🔬 Running docstring quality")
204-
if quality_start != -1:
205-
quality_log = coverage_log[quality_start:]
206-
coverage_display_log = coverage_log[:quality_start].strip()
207-
else:
208-
quality_log = ""
209-
coverage_display_log = coverage_log
200+
# or ::error title=Docstring quality::message
201+
quality_gate_match = re.search(r"::(notice|warning|error) title=Docstring quality::(.+)", quality_gate_log)
202+
quality_gate_detail = re.sub(r"\s*—\s*see job summary.*$", "", quality_gate_match.group(2)) if quality_gate_match else ""
210203
211204
lines = [
212205
"## Docs Build — Validation Summary\n",
@@ -215,16 +208,15 @@ jobs:
215208
f"| Markdownlint | {icon(markdownlint_outcome)} {markdownlint_outcome}{mode} | {lint_detail} |",
216209
f"| MDX Validation | {icon(validate_outcome)} {validate_outcome}{mode} | {mdx_detail} |",
217210
f"| API Coverage | {icon(coverage_outcome)} {coverage_outcome}{mode} | {cov_detail} |",
211+
f"| Docstring Quality | {icon(quality_gate_outcome)} {quality_gate_outcome} | {quality_gate_detail} |",
218212
]
219-
if quality_row:
220-
lines.append(quality_row)
221213
lines.append("")
222214
223215
for title, log, limit in [
224216
("Markdownlint output", lint_log, 5_000),
225217
("MDX validation output", validate_log, 5_000),
226-
("API coverage output", coverage_display_log, 5_000),
227-
("Docstring quality details", quality_log, 1_000_000),
218+
("API coverage output", coverage_log, 5_000),
219+
("Docstring quality details", quality_gate_log, 1_000_000),
228220
]:
229221
if log:
230222
lines += [

.pre-commit-config.yaml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,12 @@ repos:
5151
language: system
5252
pass_filenames: false
5353
files: (docs/docs/.*\.mdx$|tooling/docs-autogen/)
54-
# TODO(#616): Move to normal commit flow once docstring quality issues reach 0.
55-
# Griffe loads the full package (~10s), so this is manual-only for now to avoid
56-
# slowing down every Python commit. Re-enable (remove stages: [manual]) and add
57-
# --fail-on-quality once quality issues are resolved.
54+
# Docstring quality gate — manual only (CI is the hard gate via docs-publish.yml).
55+
# Run locally with: pre-commit run docs-docstring-quality --hook-stage manual
56+
# Requires generated API docs (run `uv run python tooling/docs-autogen/build.py` first).
5857
- id: docs-docstring-quality
59-
name: Audit docstring quality (informational)
60-
entry: bash -c 'test -d docs/docs/api && uv run --no-sync python tooling/docs-autogen/audit_coverage.py --quality --docs-dir docs/docs/api || true'
58+
name: Audit docstring quality
59+
entry: uv run --no-sync python tooling/docs-autogen/audit_coverage.py --quality --fail-on-quality --threshold 0 --docs-dir docs/docs/api
6160
language: system
6261
pass_filenames: false
6362
files: (mellea/.*\.py$|cli/.*\.py$)

CONTRIBUTING.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,25 @@ differs in type or behaviour from the constructor input — for example, when a
174174
argument is wrapped into a `CBlock`, or when a class-level constant is relevant to
175175
callers. Pure-echo entries that repeat `Args:` verbatim should be omitted.
176176

177+
**`TypedDict` classes are a special case.** Their fields *are* the entire public
178+
contract, so when an `Attributes:` section is present it must exactly match the
179+
declared fields. The audit will flag:
180+
181+
- `typeddict_phantom``Attributes:` documents a field that is not declared in the `TypedDict`
182+
- `typeddict_undocumented` — a declared field is absent from the `Attributes:` section
183+
184+
```python
185+
class ConstraintResult(TypedDict):
186+
"""Result of a constraint check.
187+
188+
Attributes:
189+
passed: Whether the constraint was satisfied.
190+
reason: Human-readable explanation.
191+
"""
192+
passed: bool
193+
reason: str
194+
```
195+
177196
#### Validating docstrings
178197

179198
Run the coverage and quality audit to check your changes before committing:
@@ -194,6 +213,8 @@ Key checks the audit enforces:
194213
| `no_args` | Standalone function has params but no `Args:` section |
195214
| `no_returns` | Function has a non-trivial return annotation but no `Returns:` section |
196215
| `param_mismatch` | `Args:` documents names not present in the actual signature |
216+
| `typeddict_phantom` | `TypedDict` `Attributes:` documents a field not declared in the class |
217+
| `typeddict_undocumented` | `TypedDict` has a declared field absent from its `Attributes:` section |
197218

198219
**IDE hover verification** — open any of these existing classes in VS Code and hover
199220
over the class name or a constructor call to confirm the hover card shows `Args:` once

docs/docs/guide/CONTRIBUTING.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,11 @@ Add `Attributes:` only when a stored value differs in type or behaviour from the
353353
input (e.g. a `str` wrapped into a `CBlock`, or a class-level constant).
354354
Pure-echo entries that repeat `Args:` verbatim should be omitted.
355355

356+
**`TypedDict` classes** are a special case — their fields are the entire public contract,
357+
so when an `Attributes:` section is present it must exactly match the declared fields.
358+
The CI audit will fail on phantom fields (documented but not declared) and undocumented
359+
fields (declared but missing from `Attributes:`).
360+
356361
See [CONTRIBUTING.md](../../CONTRIBUTING.md) for the full validation workflow.
357362

358363
---

tooling/docs-autogen/audit_coverage.py

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def walk_module(module, module_path: str):
102102
# ---------------------------------------------------------------------------
103103

104104
_ARGS_RE = re.compile(r"^\s*(Args|Arguments|Parameters)\s*:", re.MULTILINE)
105+
_TYPEDDICT_BASES = re.compile(r"\bTypedDict\b")
105106
_RETURNS_RE = re.compile(r"^\s*Returns\s*:", re.MULTILINE)
106107
_YIELDS_RE = re.compile(r"^\s*Yields\s*:", re.MULTILINE)
107108
_RAISES_RE = re.compile(r"^\s*Raises\s*:", re.MULTILINE)
@@ -274,6 +275,45 @@ def _check_member(member, full_path: str, short_threshold: int) -> list[dict]:
274275
}
275276
)
276277

278+
# TypedDict field mismatch check.
279+
# Unlike regular classes (where Attributes: is optional under Option C),
280+
# TypedDict fields *are* the entire public contract. When an Attributes:
281+
# section exists, every entry must match an actual declared field and every
282+
# declared field must appear — stale or missing entries are always a bug.
283+
is_typeddict = any(
284+
_TYPEDDICT_BASES.search(str(base))
285+
for base in getattr(member, "bases", [])
286+
)
287+
if is_typeddict and _ATTRIBUTES_RE.search(doc_text):
288+
attrs_block = re.search(
289+
r"Attributes\s*:(.*?)(?:\n\s*\n|\Z)", doc_text, re.DOTALL
290+
)
291+
if attrs_block:
292+
doc_field_names = set(_ARGS_ENTRY_RE.findall(attrs_block.group(1)))
293+
actual_fields = {
294+
name
295+
for name, m in member.members.items()
296+
if not name.startswith("_") and getattr(m, "is_attribute", False)
297+
}
298+
phantom = doc_field_names - actual_fields
299+
if phantom:
300+
issues.append(
301+
{
302+
"path": full_path,
303+
"kind": "typeddict_phantom",
304+
"detail": f"Attributes: documents {sorted(phantom)} not declared in TypedDict",
305+
}
306+
)
307+
undocumented = actual_fields - doc_field_names
308+
if undocumented:
309+
issues.append(
310+
{
311+
"path": full_path,
312+
"kind": "typeddict_undocumented",
313+
"detail": f"TypedDict fields {sorted(undocumented)} missing from Attributes: section",
314+
}
315+
)
316+
277317
return issues
278318

279319

@@ -296,11 +336,15 @@ def audit_docstring_quality(
296336
- no_class_args: class whose __init__ has typed params but no Args section on the class
297337
- duplicate_init_args: Args: present in both class docstring and __init__ (Option C violation)
298338
- param_mismatch: Args section documents names absent from the real signature
339+
- typeddict_phantom: TypedDict Attributes: section documents fields not declared in the class
340+
- typeddict_undocumented: TypedDict has declared fields absent from its Attributes: section
299341
300-
Note: Attributes: sections are intentionally not enforced. Under the Option C
301-
convention, Attributes: is only used when stored values differ in type or
302-
behaviour from the constructor inputs (e.g. type transforms, computed values,
303-
class constants). Pure-echo entries that repeat Args: verbatim are omitted.
342+
Note: Attributes: sections are intentionally not enforced for regular classes. Under
343+
the Option C convention, Attributes: is only used when stored values differ in type or
344+
behaviour from the constructor inputs (e.g. type transforms, computed values, class
345+
constants). Pure-echo entries that repeat Args: verbatim are omitted. TypedDicts are
346+
a carve-out: their fields are the entire public contract, so when an Attributes:
347+
section is present it must exactly match the declared fields.
304348
305349
Only symbols (and methods whose parent class) present in `documented` are
306350
checked when that set is provided — ensuring the audit is scoped to what is
@@ -401,6 +445,8 @@ def _print_quality_report(issues: list[dict]) -> None:
401445
"no_class_args": "Missing class Args section",
402446
"duplicate_init_args": "Duplicate Args: in class + __init__ (Option C violation)",
403447
"param_mismatch": "Param name mismatches (documented but not in signature)",
448+
"typeddict_phantom": "TypedDict phantom fields (documented but not declared)",
449+
"typeddict_undocumented": "TypedDict undocumented fields (declared but missing from Attributes:)",
404450
}
405451

406452
total = len(issues)
@@ -419,6 +465,8 @@ def _print_quality_report(issues: list[dict]) -> None:
419465
"no_class_args",
420466
"duplicate_init_args",
421467
"param_mismatch",
468+
"typeddict_phantom",
469+
"typeddict_undocumented",
422470
):
423471
items = by_kind.get(kind, [])
424472
if not items:

0 commit comments

Comments
 (0)