Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/pull_request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ jobs:
env:
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
PYTHONUNBUFFERED: "1"
PE_UK_DATA_OA_CLONES: "1"
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ jobs:
env:
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
PYTHONUNBUFFERED: "1"
PE_UK_DATA_OA_CLONES: "1"
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand Down
1 change: 1 addition & 0 deletions changelog.d/fix-publication.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Reduce OA cloning in GitHub Actions dataset builds so publication completes on hosted runners.
23 changes: 21 additions & 2 deletions policyengine_uk_data/datasets/create_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@
logging.basicConfig(level=logging.INFO)


def _get_positive_int_env(name: str, default: int) -> int:
raw_value = os.environ.get(name)
if raw_value is None:
return default

try:
value = int(raw_value)
except ValueError as exc:
raise ValueError(f"{name} must be an integer, got {raw_value!r}.") from exc

if value < 1:
raise ValueError(f"{name} must be >= 1, got {value}.")

return value


def main():
"""Create enhanced FRS dataset with rich progress tracking."""
try:
Expand All @@ -27,6 +43,10 @@ def main():
# Use reduced epochs and fidelity for testing
is_testing = os.environ.get("TESTING", "0") == "1"
epochs = 32 if is_testing else 512
oa_clones = _get_positive_int_env(
"PE_UK_DATA_OA_CLONES",
2 if is_testing else 10,
)

progress_tracker = ProcessingProgress()

Expand Down Expand Up @@ -118,8 +138,7 @@ def main():
clone_and_assign,
)

n_clones = 2 if is_testing else 10
frs = clone_and_assign(frs, n_clones=n_clones)
frs = clone_and_assign(frs, n_clones=oa_clones)
update_dataset("Clone and assign OA geography", "completed")

# Uprate dataset
Expand Down
Loading