diff --git a/.github/scripts/check_data_release_version.py b/.github/scripts/check_data_release_version.py index f90a30229..0a254e4b7 100644 --- a/.github/scripts/check_data_release_version.py +++ b/.github/scripts/check_data_release_version.py @@ -3,6 +3,7 @@ from __future__ import annotations import argparse +from dataclasses import dataclass import json import os from pathlib import Path @@ -19,6 +20,17 @@ ) VERSION_RE = re.compile(r'^version\s*=\s*"([^"]+)"', re.MULTILINE) SEMVER_RE = re.compile(r"^(\d+)\.(\d+)\.(\d+)(?:rc\d+)?$") +CURRENT = "current" +BEHIND = "behind" +AHEAD = "ahead" +UNKNOWN = "unknown" + + +@dataclass(frozen=True) +class ReleaseVersionState: + package_version: str + finalized_release_version: str + release_version_relation: str def stable_version_tuple(version: str) -> tuple[int, int, int]: @@ -28,7 +40,22 @@ def stable_version_tuple(version: str) -> tuple[int, int, int]: return tuple(int(part) for part in match.groups()) -def pyproject_version(root: Path = REPO_ROOT) -> str: +def release_version_relation( + *, + package_version: str, + finalized_release_version: str, +) -> str: + package_tuple = stable_version_tuple(package_version) + finalized_tuple = stable_version_tuple(finalized_release_version) + if package_tuple < finalized_tuple: + return BEHIND + if package_tuple > finalized_tuple: + return AHEAD + return CURRENT + + +def pyproject_version(root: Path | None = None) -> str: + root = root or REPO_ROOT text = (root / "pyproject.toml").read_text() match = VERSION_RE.search(text) if not match: @@ -58,8 +85,12 @@ def version_violations( package_version: str, finalized_release_version: str, ) -> list[str]: - if stable_version_tuple(package_version) >= stable_version_tuple( - finalized_release_version + if ( + release_version_relation( + package_version=package_version, + finalized_release_version=finalized_release_version, + ) + != BEHIND ): return [] return [ @@ -70,22 +101,55 @@ def version_violations( ] -def check_repository( - root: Path = REPO_ROOT, +def check_repository_state( + root: Path | None = None, *, finalized_release_version: str | None = None, version_manifest_url: str = DEFAULT_VERSION_MANIFEST_URL, -) -> list[str]: +) -> ReleaseVersionState: + root = root or REPO_ROOT package_version = pyproject_version(root) finalized_release_version = finalized_release_version or latest_hf_release_version( version_manifest_url ) - return version_violations( + relation = release_version_relation( + package_version=package_version, + finalized_release_version=finalized_release_version, + ) + return ReleaseVersionState( package_version=package_version, finalized_release_version=finalized_release_version, + release_version_relation=relation, ) +def check_repository( + root: Path | None = None, + *, + finalized_release_version: str | None = None, + version_manifest_url: str = DEFAULT_VERSION_MANIFEST_URL, +) -> list[str]: + state = check_repository_state( + root, + finalized_release_version=finalized_release_version, + version_manifest_url=version_manifest_url, + ) + return version_violations( + package_version=state.package_version, + finalized_release_version=state.finalized_release_version, + ) + + +def write_github_outputs(state: ReleaseVersionState) -> None: + output_path = os.environ.get("GITHUB_OUTPUT") + if not output_path: + return + with Path(output_path).open("a") as output: + output.write(f"package_version={state.package_version}\n") + output.write(f"finalized_release_version={state.finalized_release_version}\n") + output.write(f"release_version_relation={state.release_version_relation}\n") + + def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( @@ -103,17 +167,58 @@ def main(argv: list[str] | None = None) -> int: args = parser.parse_args(argv) try: - violations = check_repository( - version_manifest_url=args.version_manifest_url, + package_version = pyproject_version() + stable_version_tuple(package_version) + except (OSError, ValueError) as exc: + write_github_outputs( + ReleaseVersionState( + package_version="", + finalized_release_version="", + release_version_relation=UNKNOWN, + ) + ) + print(f"Could not read data package version: {exc}", file=sys.stderr) + return 1 + + try: + finalized_release_version = latest_hf_release_version(args.version_manifest_url) + state = ReleaseVersionState( + package_version=package_version, + finalized_release_version=finalized_release_version, + release_version_relation=release_version_relation( + package_version=package_version, + finalized_release_version=finalized_release_version, + ), ) except (URLError, OSError, ValueError) as exc: + write_github_outputs( + ReleaseVersionState( + package_version=package_version, + finalized_release_version="", + release_version_relation=UNKNOWN, + ) + ) print( f"Could not check finalized HF data release version: {exc}", file=sys.stderr ) return 1 if args.mode == "fail" else 0 + write_github_outputs(state) + violations = version_violations( + package_version=state.package_version, + finalized_release_version=state.finalized_release_version, + ) if not violations: - print("Data package version is current with the latest finalized HF release.") + if state.release_version_relation == AHEAD: + print( + "Data package version " + f"{state.package_version} is ahead of finalized HF release " + f"{state.finalized_release_version}." + ) + else: + print( + "Data package version is current with the latest finalized HF release." + ) return 0 for violation in violations: diff --git a/.github/scripts/promote_publication_pipeline.py b/.github/scripts/promote_publication_pipeline.py index c6637db6e..c149a7039 100644 --- a/.github/scripts/promote_publication_pipeline.py +++ b/.github/scripts/promote_publication_pipeline.py @@ -5,7 +5,6 @@ import json import os import sys -import tomllib from pathlib import Path import modal @@ -17,15 +16,9 @@ from policyengine_us_data.utils.run_context import ( # noqa: E402 RunContext, release_version_from_bump, - stable_release_version, ) -def _current_package_version() -> str: - with (_REPO_ROOT / "pyproject.toml").open("rb") as file: - return stable_release_version(tomllib.load(file)["project"]["version"]) - - def _modal_function(app_name: str, function_name: str, environment_name: str): if environment_name: return modal.Function.from_name( @@ -61,8 +54,13 @@ def _promotion_context_from_status(context: RunContext, status: dict) -> RunCont raise RuntimeError("Run manifest is missing release_bump.") release_version = _manifest_field(manifest, "release_version") if not release_version: + if not base_release_version: + raise RuntimeError( + "Run manifest is missing base_release_version, so promotion " + "cannot reconstruct release_version from release_bump." + ) release_version = release_version_from_bump( - _current_package_version(), + base_release_version, release_bump, ) return RunContext.from_mapping( diff --git a/.github/scripts/sync_finalized_data_release_version.py b/.github/scripts/sync_finalized_data_release_version.py new file mode 100644 index 000000000..48a6fd819 --- /dev/null +++ b/.github/scripts/sync_finalized_data_release_version.py @@ -0,0 +1,104 @@ +"""Synchronize pyproject.toml with the latest finalized HF data release.""" + +from __future__ import annotations + +import argparse +import os +from pathlib import Path +import re +import sys +from urllib.error import URLError + +_SCRIPT_DIR = Path(__file__).resolve().parent +if str(_SCRIPT_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPT_DIR)) + +from check_data_release_version import ( + BEHIND, + DEFAULT_VERSION_MANIFEST_URL, + REPO_ROOT, + check_repository_state, +) + + +VERSION_RE = re.compile(r'^(version\s*=\s*)"([^"]+)"', re.MULTILINE) + + +def update_pyproject_version(pyproject: Path, release_version: str) -> str: + text = pyproject.read_text() + match = VERSION_RE.search(text) + if not match: + raise ValueError("Could not find project version in pyproject.toml") + + current_version = match.group(2) + if current_version == release_version: + return current_version + + updated = VERSION_RE.sub(rf'\1"{release_version}"', text, count=1) + pyproject.write_text(updated) + return current_version + + +def sync_finalized_data_release_version( + root: Path | None = None, + *, + finalized_release_version: str | None = None, + version_manifest_url: str = DEFAULT_VERSION_MANIFEST_URL, +) -> bool: + root = root or REPO_ROOT + state = check_repository_state( + root, + finalized_release_version=finalized_release_version, + version_manifest_url=version_manifest_url, + ) + if state.release_version_relation != BEHIND: + print( + "No finalized data release version sync needed: " + f"package={state.package_version}, " + f"finalized={state.finalized_release_version}, " + f"relation={state.release_version_relation}." + ) + return False + + previous_version = update_pyproject_version( + root / "pyproject.toml", + state.finalized_release_version, + ) + print( + "Synchronized pyproject.toml with finalized HF data release: " + f"{previous_version} -> {state.finalized_release_version}." + ) + return True + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--version-manifest-url", + default=os.environ.get( + "US_DATA_VERSION_MANIFEST_URL", DEFAULT_VERSION_MANIFEST_URL + ), + ) + parser.add_argument( + "--finalized-release-version", + default=os.environ.get("US_DATA_FINALIZED_RELEASE_VERSION"), + help="Already-resolved finalized HF release version to sync to.", + ) + args = parser.parse_args(argv) + + try: + sync_finalized_data_release_version( + finalized_release_version=args.finalized_release_version, + version_manifest_url=args.version_manifest_url, + ) + except (URLError, OSError, ValueError) as exc: + print( + f"Could not synchronize finalized HF data release version: {exc}", + file=sys.stderr, + ) + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml index ab17a5d01..1c8e76cad 100644 --- a/.github/workflows/pipeline.yaml +++ b/.github/workflows/pipeline.yaml @@ -67,11 +67,6 @@ on: description: "Number of Modal workers for parallel matrix build" default: "50" type: string - allow_stale_policyengine_us: - description: "Allow production build when policyengine-us lags the latest PyPI release" - default: false - type: boolean - concurrency: group: pipeline-${{ github.run_id }}-${{ github.run_attempt }} cancel-in-progress: false @@ -103,11 +98,6 @@ jobs: RELEASE_BUMP: ${{ inputs.release_bump || '' }} run: python .github/scripts/resolve_run_context.py - - name: Require current PolicyEngine US dependency - env: - POLICYENGINE_US_ALLOW_STALE: ${{ inputs.allow_stale_policyengine_us }} - run: python .github/scripts/check_policyengine_us_dependency.py --mode fail - - name: Require pyproject.toml to match finalized HF release base run: python .github/scripts/check_data_release_version.py --mode fail diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index 08babceb0..06c2cbbde 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -30,27 +30,28 @@ jobs: id: run-context run: python .github/scripts/resolve_run_context.py - policyengine-us-freshness: - name: PolicyEngine US freshness - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - uses: actions/setup-python@v6 - with: - python-version: "3.14" - - name: Require current PolicyEngine US dependency - run: python .github/scripts/check_policyengine_us_dependency.py --mode fail - data-release-version: name: Data release version runs-on: ubuntu-latest + outputs: + package_version: ${{ steps.check-data-release-version.outputs.package_version }} + finalized_release_version: ${{ steps.check-data-release-version.outputs.finalized_release_version }} + release_version_relation: ${{ steps.check-data-release-version.outputs.release_version_relation }} steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: "3.14" - name: Require pyproject.toml to match finalized HF release base - run: python .github/scripts/check_data_release_version.py --mode fail + id: check-data-release-version + env: + HEAD_COMMIT_MESSAGE: ${{ github.event.head_commit.message }} + run: | + mode=warn + case "$HEAD_COMMIT_MESSAGE" in + "Update publication candidate"*) mode=fail ;; + esac + python .github/scripts/check_data_release_version.py --mode "$mode" # ── Documentation ────────────────────────────────────────── docs: @@ -90,11 +91,15 @@ jobs: runs-on: ubuntu-latest needs: - run-context - - policyengine-us-freshness - data-release-version if: | !startsWith(github.event.head_commit.message, 'Update publication candidate') && - !startsWith(github.event.head_commit.message, 'Finalize package version') + !startsWith(github.event.head_commit.message, 'Finalize package version') && + ( + needs.data-release-version.outputs.release_version_relation == 'current' || + needs.data-release-version.outputs.release_version_relation == 'ahead' || + needs.data-release-version.outputs.release_version_relation == 'behind' + ) outputs: version_sha: ${{ steps.version-commit.outputs.sha }} steps: @@ -112,6 +117,11 @@ jobs: with: python-version: "3.14" - uses: astral-sh/setup-uv@v8.1.0 + - name: Sync finalized data release version + if: needs.data-release-version.outputs.release_version_relation == 'behind' + run: | + python .github/scripts/sync_finalized_data_release_version.py \ + --finalized-release-version "${{ needs.data-release-version.outputs.finalized_release_version }}" - name: Snapshot candidate changelog fragments env: US_DATA_RUN_ID: ${{ needs.run-context.outputs.run_id }} @@ -137,7 +147,6 @@ jobs: needs: - lint - run-context - - policyengine-us-freshness - data-release-version if: startsWith(github.event.head_commit.message, 'Update publication candidate') permissions: diff --git a/changelog.d/1089.fixed b/changelog.d/1089.fixed new file mode 100644 index 000000000..fbc538199 --- /dev/null +++ b/changelog.d/1089.fixed @@ -0,0 +1 @@ +Self-heal publication candidate generation when the finalized Hugging Face data release is ahead of the local package version, and remove PolicyEngine US PyPI freshness gates from main-push and pipeline runs. diff --git a/tests/unit/test_publication_scripts.py b/tests/unit/test_publication_scripts.py index 4f58750e7..d7df93b0c 100644 --- a/tests/unit/test_publication_scripts.py +++ b/tests/unit/test_publication_scripts.py @@ -380,6 +380,207 @@ def test_data_release_version_check_flags_stale_package(tmp_path): assert any("1.115.3" in violation for violation in violations) +@pytest.mark.parametrize( + ("package_version", "finalized_release_version", "expected_relation"), + [ + ("1.115.3", "1.115.3", "current"), + ("1.115.2", "1.115.3", "behind"), + ("1.115.4", "1.115.3", "ahead"), + ("1.115.3rc1", "1.115.3", "current"), + ], +) +def test_data_release_version_state_relations( + tmp_path, + package_version, + finalized_release_version, + expected_relation, +): + module = _load_script( + ".github/scripts/check_data_release_version.py", + f"check_data_release_version_{expected_relation}_state_test", + ) + _write_pyproject(tmp_path, package_version) + + state = module.check_repository_state( + tmp_path, + finalized_release_version=finalized_release_version, + ) + + assert state.package_version == package_version + assert state.finalized_release_version == finalized_release_version + assert state.release_version_relation == expected_relation + + +def test_data_release_version_check_emits_github_outputs( + tmp_path, + monkeypatch, +): + module = _load_script( + ".github/scripts/check_data_release_version.py", + "check_data_release_version_outputs_test", + ) + _write_pyproject(tmp_path, "1.115.2") + github_output = tmp_path / "github_output" + monkeypatch.setattr(module, "REPO_ROOT", tmp_path) + monkeypatch.setenv("GITHUB_OUTPUT", str(github_output)) + monkeypatch.setattr(module, "latest_hf_release_version", lambda url: "1.115.3") + + assert module.main(["--mode", "warn"]) == 0 + + outputs = dict( + line.split("=", 1) for line in github_output.read_text().splitlines() + ) + assert outputs == { + "package_version": "1.115.2", + "finalized_release_version": "1.115.3", + "release_version_relation": "behind", + } + + +def test_data_release_version_check_emits_unknown_on_manifest_error( + tmp_path, + monkeypatch, + capsys, +): + module = _load_script( + ".github/scripts/check_data_release_version.py", + "check_data_release_version_unknown_outputs_test", + ) + _write_pyproject(tmp_path, "1.115.2") + github_output = tmp_path / "github_output" + monkeypatch.setattr(module, "REPO_ROOT", tmp_path) + monkeypatch.setenv("GITHUB_OUTPUT", str(github_output)) + monkeypatch.setattr( + module, + "latest_hf_release_version", + lambda url: (_ for _ in ()).throw(OSError("manifest unavailable")), + ) + + assert module.main(["--mode", "warn"]) == 0 + + outputs = dict( + line.split("=", 1) for line in github_output.read_text().splitlines() + ) + assert outputs == { + "package_version": "1.115.2", + "finalized_release_version": "", + "release_version_relation": "unknown", + } + assert "manifest unavailable" in capsys.readouterr().err + + +def test_data_release_version_check_fails_on_invalid_local_version( + tmp_path, + monkeypatch, + capsys, +): + module = _load_script( + ".github/scripts/check_data_release_version.py", + "check_data_release_version_invalid_local_test", + ) + _write_pyproject(tmp_path, "1.115") + github_output = tmp_path / "github_output" + monkeypatch.setattr(module, "REPO_ROOT", tmp_path) + monkeypatch.setenv("GITHUB_OUTPUT", str(github_output)) + monkeypatch.setattr(module, "latest_hf_release_version", lambda url: "1.115.3") + + assert module.main(["--mode", "warn"]) == 1 + + outputs = dict( + line.split("=", 1) for line in github_output.read_text().splitlines() + ) + assert outputs["release_version_relation"] == "unknown" + assert "Unsupported version format: 1.115" in capsys.readouterr().err + + +def test_sync_finalized_data_release_version_updates_stale_pyproject(tmp_path): + module = _load_script( + ".github/scripts/sync_finalized_data_release_version.py", + "sync_finalized_data_release_version_update_test", + ) + _write_pyproject(tmp_path, "1.115.2") + + changed = module.sync_finalized_data_release_version( + tmp_path, + finalized_release_version="1.115.3", + ) + + assert changed is True + assert 'version = "1.115.3"' in (tmp_path / "pyproject.toml").read_text() + + +def test_sync_finalized_data_release_version_leaves_current_pyproject(tmp_path): + module = _load_script( + ".github/scripts/sync_finalized_data_release_version.py", + "sync_finalized_data_release_version_current_test", + ) + _write_pyproject(tmp_path, "1.115.3") + before = (tmp_path / "pyproject.toml").read_text() + + changed = module.sync_finalized_data_release_version( + tmp_path, + finalized_release_version="1.115.3", + ) + + assert changed is False + assert (tmp_path / "pyproject.toml").read_text() == before + + +def test_sync_finalized_data_release_version_treats_matching_rc_as_current( + tmp_path, +): + module = _load_script( + ".github/scripts/sync_finalized_data_release_version.py", + "sync_finalized_data_release_version_rc_test", + ) + _write_pyproject(tmp_path, "1.115.3rc1") + before = (tmp_path / "pyproject.toml").read_text() + + changed = module.sync_finalized_data_release_version( + tmp_path, + finalized_release_version="1.115.3", + ) + + assert changed is False + assert (tmp_path / "pyproject.toml").read_text() == before + + +def test_sync_then_bump_version_uses_synced_base_release( + tmp_path, + monkeypatch, +): + sync_module = _load_script( + ".github/scripts/sync_finalized_data_release_version.py", + "sync_finalized_data_release_version_workflow_test", + ) + bump_module = _load_script( + ".github/bump_version.py", + "bump_version_after_sync_script_test", + ) + _write_pyproject(tmp_path, "1.115.4") + changelog_dir = tmp_path / "changelog.d" + changelog_dir.mkdir() + (changelog_dir / "123.fixed").write_text("Fixed a thing.\n") + monkeypatch.setattr(bump_module, "_REPO_ROOT", tmp_path) + monkeypatch.setenv("US_DATA_RUN_ID", "run-123") + + assert ( + sync_module.sync_finalized_data_release_version( + tmp_path, + finalized_release_version="1.115.5", + ) + is True + ) + bump_module.main() + + assert ( + json.loads((tmp_path / ".github" / "publication_scope.json").read_text())[ + "base_release_version" + ] + == "1.115.5" + ) + + def test_restore_publication_changelog_restores_candidate_snapshot( tmp_path, monkeypatch, @@ -660,7 +861,7 @@ def from_name(app_name, function_name, **kwargs): ".github/scripts/promote_publication_pipeline.py", "promote_publication_pipeline_script_test", ) - _write_pyproject(tmp_path, "1.73.0") + _write_pyproject(tmp_path, "9.9.9") github_env = tmp_path / "github_env" monkeypatch.setattr(module, "_REPO_ROOT", tmp_path) monkeypatch.setenv("GITHUB_ENV", str(github_env)) @@ -686,6 +887,48 @@ def from_name(app_name, function_name, **kwargs): assert "VERSION_OVERRIDE" not in json.dumps(captured["calls"]) +def test_promote_publication_script_fallback_release_uses_manifest_base( + tmp_path, + monkeypatch, +): + monkeypatch.setitem( + sys.modules, + "modal", + types.SimpleNamespace(Function=types.SimpleNamespace()), + ) + module = _load_script( + ".github/scripts/promote_publication_pipeline.py", + "promote_publication_pipeline_manifest_base_test", + ) + _write_pyproject(tmp_path, "9.9.9") + monkeypatch.setattr(module, "_REPO_ROOT", tmp_path) + context = module.RunContext.from_mapping( + {"run_id": "run-123"}, + modal_app_name="app", + modal_environment="main", + ) + + promoted_context = module._promotion_context_from_status( + context, + { + "run_manifest": { + "run_id": "run-123", + "candidate_version": "1.73.0-minor", + "base_release_version": "1.73.0", + "release_bump": "minor", + "run_context": { + "run_id": "run-123", + "candidate_version": "1.73.0-minor", + "base_release_version": "1.73.0", + "release_bump": "minor", + }, + } + }, + ) + + assert promoted_context.release_version == "1.74.0" + + def test_promote_publication_script_prefers_manifest_release_version( tmp_path, monkeypatch,