Skip to content

Commit e97832a

Browse files
authored
Synchronize data release version guard (#1013)
1 parent 9ad7883 commit e97832a

7 files changed

Lines changed: 176 additions & 2 deletions

File tree

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
"""Require pyproject.toml to track the latest finalized HF data release."""
2+
3+
from __future__ import annotations
4+
5+
import argparse
6+
import json
7+
import os
8+
from pathlib import Path
9+
import re
10+
import sys
11+
from urllib.error import URLError
12+
from urllib.request import urlopen
13+
14+
15+
REPO_ROOT = Path(__file__).resolve().parents[2]
16+
DEFAULT_VERSION_MANIFEST_URL = (
17+
"https://huggingface.co/policyengine/policyengine-us-data/"
18+
"resolve/main/version_manifest.json"
19+
)
20+
VERSION_RE = re.compile(r'^version\s*=\s*"([^"]+)"', re.MULTILINE)
21+
SEMVER_RE = re.compile(r"^(\d+)\.(\d+)\.(\d+)(?:rc\d+)?$")
22+
23+
24+
def stable_version_tuple(version: str) -> tuple[int, int, int]:
25+
match = SEMVER_RE.match(version)
26+
if not match:
27+
raise ValueError(f"Unsupported version format: {version}")
28+
return tuple(int(part) for part in match.groups())
29+
30+
31+
def pyproject_version(root: Path = REPO_ROOT) -> str:
32+
text = (root / "pyproject.toml").read_text()
33+
match = VERSION_RE.search(text)
34+
if not match:
35+
raise ValueError("Could not find project version in pyproject.toml")
36+
return match.group(1)
37+
38+
39+
def latest_hf_release_version(
40+
url: str = DEFAULT_VERSION_MANIFEST_URL,
41+
) -> str:
42+
with urlopen(url, timeout=30) as response:
43+
payload = json.load(response)
44+
current = payload.get("current")
45+
if isinstance(current, str) and current:
46+
return current
47+
versions = payload.get("versions")
48+
if not isinstance(versions, list) or not versions:
49+
raise ValueError("HF version_manifest.json has no current version")
50+
latest = versions[-1].get("version")
51+
if not isinstance(latest, str) or not latest:
52+
raise ValueError("HF version_manifest.json latest entry has no version")
53+
return latest
54+
55+
56+
def version_violations(
57+
*,
58+
package_version: str,
59+
finalized_release_version: str,
60+
) -> list[str]:
61+
if stable_version_tuple(package_version) >= stable_version_tuple(
62+
finalized_release_version
63+
):
64+
return []
65+
return [
66+
"pyproject.toml version "
67+
f"{package_version} is behind finalized HF data release "
68+
f"{finalized_release_version}. Finalize the package version before "
69+
"creating another publication candidate."
70+
]
71+
72+
73+
def check_repository(
74+
root: Path = REPO_ROOT,
75+
*,
76+
finalized_release_version: str | None = None,
77+
version_manifest_url: str = DEFAULT_VERSION_MANIFEST_URL,
78+
) -> list[str]:
79+
package_version = pyproject_version(root)
80+
finalized_release_version = finalized_release_version or latest_hf_release_version(
81+
version_manifest_url
82+
)
83+
return version_violations(
84+
package_version=package_version,
85+
finalized_release_version=finalized_release_version,
86+
)
87+
88+
89+
def main(argv: list[str] | None = None) -> int:
90+
parser = argparse.ArgumentParser(description=__doc__)
91+
parser.add_argument(
92+
"--mode",
93+
choices=("warn", "fail"),
94+
default="fail",
95+
help="Whether stale versions should fail the command.",
96+
)
97+
parser.add_argument(
98+
"--version-manifest-url",
99+
default=os.environ.get(
100+
"US_DATA_VERSION_MANIFEST_URL", DEFAULT_VERSION_MANIFEST_URL
101+
),
102+
)
103+
args = parser.parse_args(argv)
104+
105+
try:
106+
violations = check_repository(
107+
version_manifest_url=args.version_manifest_url,
108+
)
109+
except (URLError, OSError, ValueError) as exc:
110+
print(
111+
f"Could not check finalized HF data release version: {exc}", file=sys.stderr
112+
)
113+
return 1 if args.mode == "fail" else 0
114+
115+
if not violations:
116+
print("Data package version is current with the latest finalized HF release.")
117+
return 0
118+
119+
for violation in violations:
120+
print(violation, file=sys.stderr)
121+
return 1 if args.mode == "fail" else 0
122+
123+
124+
if __name__ == "__main__":
125+
sys.exit(main())

.github/workflows/pipeline.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ jobs:
108108
POLICYENGINE_US_ALLOW_STALE: ${{ inputs.allow_stale_policyengine_us }}
109109
run: python .github/scripts/check_policyengine_us_dependency.py --mode fail
110110

111+
- name: Require pyproject.toml to match finalized HF release base
112+
run: python .github/scripts/check_data_release_version.py --mode fail
113+
111114
- name: Deploy and launch pipeline on Modal
112115
env:
113116
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}

.github/workflows/push.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,17 @@ jobs:
4141
- name: Require current PolicyEngine US dependency
4242
run: python .github/scripts/check_policyengine_us_dependency.py --mode fail
4343

44+
data-release-version:
45+
name: Data release version
46+
runs-on: ubuntu-latest
47+
steps:
48+
- uses: actions/checkout@v6
49+
- uses: actions/setup-python@v6
50+
with:
51+
python-version: "3.14"
52+
- name: Require pyproject.toml to match finalized HF release base
53+
run: python .github/scripts/check_data_release_version.py --mode fail
54+
4455
# ── Documentation ──────────────────────────────────────────
4556
docs:
4657
name: Documentation
@@ -80,6 +91,7 @@ jobs:
8091
needs:
8192
- run-context
8293
- policyengine-us-freshness
94+
- data-release-version
8395
if: |
8496
github.event.head_commit.message != 'Update publication candidate' &&
8597
github.event.head_commit.message != 'Finalize package version'
@@ -126,6 +138,7 @@ jobs:
126138
- lint
127139
- run-context
128140
- policyengine-us-freshness
141+
- data-release-version
129142
if: github.event.head_commit.message == 'Update publication candidate'
130143
permissions:
131144
actions: write
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Ensure publication candidates fail before launch when the package version lags the latest finalized data release.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
88

99
[project]
1010
name = "policyengine_us_data"
11-
version = "1.115.2"
11+
version = "1.115.3"
1212
description = "A package to create representative microdata for the US."
1313
readme = "README.md"
1414
authors = [

tests/unit/test_publication_scripts.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,38 @@ def test_policyengine_us_dependency_check_allow_stale_keeps_local_errors_fatal(
348348
assert module.main() == 1
349349

350350

351+
def test_data_release_version_check_passes_at_latest_release(tmp_path):
352+
module = _load_script(
353+
".github/scripts/check_data_release_version.py",
354+
"check_data_release_version_current_test",
355+
)
356+
_write_pyproject(tmp_path, "1.115.3")
357+
358+
assert (
359+
module.check_repository(
360+
tmp_path,
361+
finalized_release_version="1.115.3",
362+
)
363+
== []
364+
)
365+
366+
367+
def test_data_release_version_check_flags_stale_package(tmp_path):
368+
module = _load_script(
369+
".github/scripts/check_data_release_version.py",
370+
"check_data_release_version_stale_test",
371+
)
372+
_write_pyproject(tmp_path, "1.115.2")
373+
374+
violations = module.check_repository(
375+
tmp_path,
376+
finalized_release_version="1.115.3",
377+
)
378+
379+
assert any("1.115.2" in violation for violation in violations)
380+
assert any("1.115.3" in violation for violation in violations)
381+
382+
351383
def test_restore_publication_changelog_restores_candidate_snapshot(
352384
tmp_path,
353385
monkeypatch,

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)