Skip to content

Commit 6375817

Browse files
authored
chore: subject-case sync extension — commitizen + pr-title (#128, #154) (#73)
1 parent 461cab6 commit 6375817

4 files changed

Lines changed: 410 additions & 48 deletions

File tree

.github/scripts/check_commit_types.py

Lines changed: 157 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,30 @@
11
#!/usr/bin/env python3
2-
"""Verify the commit-type allowlist stays in sync across two configs.
2+
"""Verify the commit-type allowlist + subject-case rule stay in sync.
33
4-
Seven prefixes are allowed on commits and PR titles: feat, fix, docs,
5-
test, refactor, chore, release. Two places enforce that list today:
4+
Two configs hand-encode the same conventional-commit policy:
65
76
1. ``[tool.commitizen].customize.schema_pattern`` in ``pyproject.toml`` —
87
the commitizen regex (commit-msg hook, local).
98
2. ``.github/workflows/pr-title.yml`` ``types:`` input to the
10-
``amannn/action-semantic-pull-request`` step — the PR-title CI check.
9+
``amannn/action-semantic-pull-request`` step plus its ``subjectPattern``
10+
— the PR-title CI check.
1111
1212
Both are hand-maintained. Add a type in one, forget the other, and the
1313
two layers drift: commits fail locally but PR titles pass (or vice
1414
versa). ``docs/DEVELOPMENT.md`` explicitly warns these must stay in
1515
sync, but prose warnings drift too.
1616
17-
This script mirrors the ``check_required_contexts.py`` pattern from #72
18-
for this second drift class. Fails CI when the two sets disagree in
19-
either direction.
17+
This script enforces sync on **two axes**:
18+
19+
- **Type allowlist** — the seven prefixes (feat, fix, docs, test,
20+
refactor, chore, release). Mirrors the ``check_required_contexts.py``
21+
pattern from #72.
22+
- **Subject-case rule** — the negative-lookahead constraint that rejects
23+
Title-Case subjects (``feat: Add thing`` → reject; ``feat: add thing``
24+
/ ``feat: CI failure`` → accept). Added in #128 so commitizen rejects
25+
Title Case at commit-msg time, not just at the CI gate.
26+
27+
Fails CI when either axis disagrees in either direction.
2028
2129
Usage (from repo root):
2230
@@ -48,6 +56,19 @@
4856
# extraction when a future type contained digits or hyphens.
4957
_SCHEMA_ALTERNATION_RE = re.compile(r"\^\(([a-z0-9\-|]+)\)")
5058

59+
# Matches the subject-case constraint between `:\s` and the trailing `.+`
60+
# in the commitizen schema_pattern. Tolerates three shapes seen across
61+
# revisions:
62+
# :\s — original #128 shape (single-space, susceptible to the
63+
# `feat: Add thing` double-space bypass).
64+
# :\s+ — naive widening (still backtracks on Title-Case input).
65+
# :\s++ — possessive quantifier (#154); the schema we want long-term
66+
# because it forbids the lookahead-bypass via backtracking.
67+
# All three encode the same "after `:` then whitespace, then this lookahead"
68+
# semantics; the regex captures the lookahead chunk regardless.
69+
# Returns "" if no subject constraint is present (commitizen pre-#128 shape).
70+
_SCHEMA_SUBJECT_RE = re.compile(r":\\s\+{0,2}(.*?)\.\+$")
71+
5172

5273
def commitizen_types() -> set[str]:
5374
"""Return the set of types allowed by the commitizen schema regex."""
@@ -83,28 +104,87 @@ def commitizen_types() -> set[str]:
83104
return types
84105

85106

107+
def commitizen_subject_pattern() -> str:
108+
"""Extract the subject-case constraint from commitizen's schema_pattern.
109+
110+
The schema_pattern shape (post-#128):
111+
^(feat|fix|...)(\\([\\w\\-]+\\))?!?:\\s(?![A-Z][a-z]).+
112+
113+
Returns the chunk between ``:\\s`` and the trailing ``.+`` — i.e. the
114+
negative-lookahead constraint on the subject. Returns "" when no
115+
subject constraint is present (commitizen pre-#128 shape).
116+
"""
117+
data = tomllib.loads(PYPROJECT.read_text(encoding="utf-8"))
118+
schema: str = (
119+
data.get("tool", {})
120+
.get("commitizen", {})
121+
.get("customize", {})
122+
.get("schema_pattern", "")
123+
)
124+
if not schema:
125+
# Same error commitizen_types() raises — caller already enforces.
126+
return ""
127+
match = _SCHEMA_SUBJECT_RE.search(schema)
128+
if not match:
129+
return ""
130+
return match.group(1)
131+
132+
86133
def pr_title_types() -> set[str]:
87134
"""Return the set of types declared in the pr-title workflow."""
135+
return _pr_title_field("types", _parse_types) # type: ignore[return-value]
136+
137+
138+
def pr_title_subject_pattern() -> str:
139+
"""Return the subject-case constraint declared in the pr-title workflow.
140+
141+
Strips the leading ``^`` anchor and the trailing ``.+$`` from the
142+
YAML ``subjectPattern`` field so the comparison with commitizen's
143+
constraint is normalised. Returns "" when the field is absent.
144+
"""
145+
raw: str = _pr_title_field("subjectPattern", lambda v: v or "", required=False) # type: ignore[assignment]
146+
if not raw:
147+
return ""
148+
pattern = re.sub(r"^\^", "", raw)
149+
pattern = re.sub(r"\.\+\$$", "", pattern)
150+
return pattern
151+
152+
153+
def _parse_types(value: str) -> set[str]:
154+
"""Parse the YAML ``types`` field (newline-separated string) into a set."""
155+
types = {line.strip() for line in value.splitlines() if line.strip()}
156+
if not types:
157+
msg = (
158+
f"`types:` block in {PR_TITLE_YML} is empty or "
159+
"whitespace-only. Expected at least one commit type per line."
160+
)
161+
raise ValueError(msg)
162+
return types
163+
164+
165+
def _pr_title_field(
166+
name: str,
167+
parse: object,
168+
*,
169+
required: bool = True,
170+
) -> object:
171+
"""Extract a single field from the action-semantic-pull-request step."""
88172
data = yaml.safe_load(PR_TITLE_YML.read_text(encoding="utf-8"))
89173
for job in data.get("jobs", {}).values():
90174
for step in job.get("steps", []):
91175
uses = step.get("uses", "")
92176
if "action-semantic-pull-request" in uses:
93-
types_block: str = step.get("with", {}).get("types", "")
94-
types = {
95-
line.strip() for line in types_block.splitlines() if line.strip()
96-
}
97-
# An empty or whitespace-only `types:` block would return an
98-
# empty set and trivially match an empty commitizen set —
99-
# masking a real config error. Fail loudly instead (#92).
100-
if not types:
101-
msg = (
102-
f"`types:` block in {PR_TITLE_YML} is empty or "
103-
"whitespace-only. Expected at least one commit type "
104-
"per line."
105-
)
106-
raise ValueError(msg)
107-
return types
177+
value = step.get("with", {}).get(name)
178+
if value is None:
179+
if required:
180+
msg = (
181+
f"`with.{name}` not found in the "
182+
"action-semantic-pull-request step. Update this "
183+
"script if the action's input names changed."
184+
)
185+
raise ValueError(msg)
186+
return ""
187+
return parse(value) # type: ignore[operator]
108188
msg = (
109189
"Could not find an `amannn/action-semantic-pull-request` step in "
110190
f"{PR_TITLE_YML}. If the action was renamed or the file moved, "
@@ -114,38 +194,70 @@ def pr_title_types() -> set[str]:
114194

115195

116196
def main() -> int:
117-
cz = commitizen_types()
118-
pr = pr_title_types()
197+
cz_types = commitizen_types()
198+
pr_types = pr_title_types()
199+
cz_subject = commitizen_subject_pattern()
200+
pr_subject = pr_title_subject_pattern()
201+
202+
failed = False
119203

120204
# Belt-and-braces safety net: both extractors raise on empty, but guard
121205
# against a future refactor that drops the raise (#92).
122-
if not cz or not pr:
206+
if not cz_types or not pr_types:
123207
print(
124208
"::error::One or both extractors returned empty; sync check cannot "
125-
"proceed. commitizen_types() empty: "
126-
f"{not cz}; pr_title_types() empty: {not pr}."
209+
f"proceed. commitizen_types() empty: {not cz_types}; "
210+
f"pr_title_types() empty: {not pr_types}."
127211
)
128212
return 1
129213

130-
if cz == pr:
131-
print(f"Commit types in sync ({len(cz)} types): {sorted(cz)}")
132-
return 0
214+
if cz_types == pr_types:
215+
print(f"Commit types in sync ({len(cz_types)} types): {sorted(cz_types)}")
216+
else:
217+
failed = True
218+
print(
219+
"::error::[tool.commitizen].customize.schema_pattern and "
220+
".github/workflows/pr-title.yml types are out of sync"
221+
)
222+
for name in sorted(cz_types - pr_types):
223+
print(f"::error:: + in commitizen only: {name!r}")
224+
for name in sorted(pr_types - cz_types):
225+
print(f"::error:: - in pr-title.yml only: {name!r}")
226+
print(
227+
"\nFix: update both the schema_pattern in pyproject.toml AND "
228+
"the `types` list in .github/workflows/pr-title.yml so they "
229+
"contain the same type names. See docs/DEVELOPMENT.md#commit-messages."
230+
)
231+
232+
if cz_subject == pr_subject:
233+
if cz_subject:
234+
print(f"Subject-case constraint in sync: {cz_subject!r}")
235+
else:
236+
# Both empty — older shape, before #128's subject-case landed in
237+
# commitizen. Don't fail here; the `Lint PR title` workflow remains
238+
# the single layer if commitizen drops back. Surface as a warning.
239+
print(
240+
"::warning::Both commitizen and pr-title.yml have empty "
241+
"subject-case constraints. Per docs/DEVELOPMENT.md the rule "
242+
"should be enforced at both layers — re-add `(?![A-Z][a-z])` "
243+
"to commitizen's schema_pattern after `:\\s`."
244+
)
245+
else:
246+
failed = True
247+
print(
248+
"::error::commitizen schema_pattern subject-case constraint "
249+
"and pr-title.yml `subjectPattern` are out of sync"
250+
)
251+
print(f"::error:: commitizen extracted: {cz_subject!r}")
252+
print(f"::error:: pr-title.yml extracted: {pr_subject!r}")
253+
print(
254+
"\nFix: keep both regexes equivalent after stripping anchors. "
255+
"Commitizen's chunk lives between `:\\s` and `.+` in "
256+
"schema_pattern; pr-title.yml's lives in the `subjectPattern` "
257+
"field stripped of `^` and `.+$`."
258+
)
133259

134-
print(
135-
"::error::[tool.commitizen].customize.schema_pattern and "
136-
".github/workflows/pr-title.yml types are out of sync"
137-
)
138-
for name in sorted(cz - pr):
139-
print(f"::error:: + in commitizen only: {name!r}")
140-
for name in sorted(pr - cz):
141-
print(f"::error:: - in pr-title.yml only: {name!r}")
142-
print(
143-
"\nFix: update both the schema_pattern in pyproject.toml AND "
144-
"the `types` list in .github/workflows/pr-title.yml so they "
145-
"contain the same type names. See docs/DEVELOPMENT.md#commit-messages "
146-
"for the current allowed list."
147-
)
148-
return 1
260+
return 1 if failed else 0
149261

150262

151263
if __name__ == "__main__":

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "harness-python-react"
3-
version = "0.2.2"
3+
version = "0.2.3"
44
description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend."
55
readme = "README.md"
66
requires-python = ">=3.14"
@@ -165,7 +165,7 @@ name = "cz_customize"
165165

166166
[tool.commitizen.customize]
167167
schema = "<type>(<scope>): <subject>"
168-
schema_pattern = '^(feat|fix|docs|test|refactor|chore|release)(\([\w\-]+\))?!?:\s.+'
168+
schema_pattern = '^(feat|fix|docs|test|refactor|chore|release)(\([\w\-]+\))?!?:\s++(?![A-Z][a-z]).+'
169169
bump_pattern = '^(feat|fix|refactor)'
170170
bump_map = {feat = "MINOR", fix = "PATCH", refactor = "PATCH", chore = "PATCH", docs = "PATCH", test = "PATCH"}
171171
example = "feat: add an example feature"

0 commit comments

Comments
 (0)