From 635356d88b7cd4fb99ec5ab0ad11f0a4e5a6f8ed Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 13 Apr 2026 09:31:57 -0400 Subject: [PATCH 1/2] Add CI heartbeat logs for dataset builds --- .github/workflows/pull_request.yaml | 1 + .github/workflows/push.yaml | 1 + changelog.d/ci-progress-heartbeats.fixed.md | 1 + policyengine_uk_data/tests/test_progress.py | 38 +++++++++++++++++ policyengine_uk_data/utils/progress.py | 47 ++++++++++++++++++++- 5 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 changelog.d/ci-progress-heartbeats.fixed.md create mode 100644 policyengine_uk_data/tests/test_progress.py diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index 0b8e9d645..3055afa29 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -32,6 +32,7 @@ jobs: runs-on: ubuntu-latest env: HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} + PYTHONUNBUFFERED: "1" steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index 5ca271170..84e774cc9 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -35,6 +35,7 @@ jobs: id-token: "write" env: HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} + PYTHONUNBUFFERED: "1" steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/changelog.d/ci-progress-heartbeats.fixed.md b/changelog.d/ci-progress-heartbeats.fixed.md new file mode 100644 index 000000000..5d15ff48b --- /dev/null +++ b/changelog.d/ci-progress-heartbeats.fixed.md @@ -0,0 +1 @@ +Make long-running dataset builds emit plain CI heartbeat logs so release workflows are less likely to die silently during calibration. diff --git a/policyengine_uk_data/tests/test_progress.py b/policyengine_uk_data/tests/test_progress.py new file mode 100644 index 000000000..25c655ba0 --- /dev/null +++ b/policyengine_uk_data/tests/test_progress.py @@ -0,0 +1,38 @@ +from policyengine_uk_data.utils.progress import ProcessingProgress + + +def test_track_dataset_creation_logs_in_ci(monkeypatch, capsys): + monkeypatch.setenv("GITHUB_ACTIONS", "true") + + progress = ProcessingProgress() + + with progress.track_dataset_creation(["Build base", "Save final"]) as ( + update_dataset, + nested_progress, + ): + assert nested_progress is None + update_dataset("Build base", "processing") + update_dataset("Build base", "completed") + update_dataset("Save final", "processing") + update_dataset("Save final", "completed") + + output = capsys.readouterr().out + assert "[dataset] starting: Build base" in output + assert "[dataset] completed (1/2): Build base" in output + assert "[dataset] completed (2/2): Save final" in output + + +def test_track_calibration_logs_heartbeats_in_ci(monkeypatch, capsys): + monkeypatch.setenv("CI", "true") + + progress = ProcessingProgress() + + with progress.track_calibration(12) as update_calibration: + for iteration in range(1, 13): + update_calibration(iteration, calculating_loss=True) + update_calibration(iteration, loss_value=iteration / 10) + + output = capsys.readouterr().out + assert "[calibration] epoch 1/12: calculating loss" in output + assert "[calibration] epoch 10/12: loss=1.000000" in output + assert "[calibration] epoch 12/12: loss=1.200000" in output diff --git a/policyengine_uk_data/utils/progress.py b/policyengine_uk_data/utils/progress.py index e6a70f89d..a6a386f27 100644 --- a/policyengine_uk_data/utils/progress.py +++ b/policyengine_uk_data/utils/progress.py @@ -6,8 +6,8 @@ """ from contextlib import contextmanager +import os from typing import Any, Dict, List, Optional, Union -import time from rich.console import Console from rich.progress import ( @@ -187,6 +187,13 @@ def __init__(self, console: Optional[Console] = None): """ self.console = console or Console() self.progress_manager: Optional[RichProgress] = None + self._plain_output = ( + os.environ.get("GITHUB_ACTIONS") == "true" + or os.environ.get("CI") == "true" + ) + + def _emit(self, message: str): + print(message, flush=True) @contextmanager def track_dataset_creation(self, datasets: List[str]): @@ -198,6 +205,23 @@ def track_dataset_creation(self, datasets: List[str]): Yields: Tuple of (update_dataset function, progress manager for nested tasks). """ + if self._plain_output: + completed_count = 0 + + def update_dataset(dataset_name: str, status: str = "processing"): + nonlocal completed_count + + if status == "processing": + self._emit(f"[dataset] starting: {dataset_name}") + elif status == "completed": + completed_count += 1 + self._emit( + f"[dataset] completed ({completed_count}/{len(datasets)}): {dataset_name}" + ) + + yield update_dataset, None + return + with create_progress(self.console) as progress: # Main dataset creation progress main_task = progress.add_task( @@ -265,6 +289,27 @@ def track_calibration(self, iterations: int, nested_progress=None): Yields: Function to update calibration progress. """ + if self._plain_output: + def update_calibration( + iteration: int, + loss_value: Optional[float] = None, + calculating_loss: bool = False, + ): + if calculating_loss: + self._emit( + f"[calibration] epoch {iteration}/{iterations}: calculating loss" + ) + elif ( + loss_value is not None + and (iteration == 1 or iteration == iterations or iteration % 10 == 0) + ): + self._emit( + f"[calibration] epoch {iteration}/{iterations}: loss={loss_value:.6f}" + ) + + yield update_calibration + return + if nested_progress: # Add calibration as a nested task in existing progress calibration_task = nested_progress.add_task( From 475127175a50f75664d6e923ffb9ad487f48c708 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 13 Apr 2026 09:36:00 -0400 Subject: [PATCH 2/2] Format CI heartbeat progress logging --- policyengine_uk_data/utils/progress.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/policyengine_uk_data/utils/progress.py b/policyengine_uk_data/utils/progress.py index a6a386f27..539cd0657 100644 --- a/policyengine_uk_data/utils/progress.py +++ b/policyengine_uk_data/utils/progress.py @@ -188,8 +188,7 @@ def __init__(self, console: Optional[Console] = None): self.console = console or Console() self.progress_manager: Optional[RichProgress] = None self._plain_output = ( - os.environ.get("GITHUB_ACTIONS") == "true" - or os.environ.get("CI") == "true" + os.environ.get("GITHUB_ACTIONS") == "true" or os.environ.get("CI") == "true" ) def _emit(self, message: str): @@ -290,6 +289,7 @@ def track_calibration(self, iterations: int, nested_progress=None): Function to update calibration progress. """ if self._plain_output: + def update_calibration( iteration: int, loss_value: Optional[float] = None, @@ -299,9 +299,8 @@ def update_calibration( self._emit( f"[calibration] epoch {iteration}/{iterations}: calculating loss" ) - elif ( - loss_value is not None - and (iteration == 1 or iteration == iterations or iteration % 10 == 0) + elif loss_value is not None and ( + iteration == 1 or iteration == iterations or iteration % 10 == 0 ): self._emit( f"[calibration] epoch {iteration}/{iterations}: loss={loss_value:.6f}"