Skip to content

Commit b181b10

Browse files
Merge pull request #105 from PolicyEngine/nikhilwoodruff/issue104
Add version information to data locations
2 parents 98cdda6 + d5da660 commit b181b10

6 files changed

Lines changed: 192 additions & 72 deletions

File tree

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: Versioning
2+
3+
on:
4+
pull_request:
5+
branches: [ main ]
6+
7+
jobs:
8+
check-changelog-entry:
9+
name: Changelog entry check
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v3
13+
14+
- name: Check for changelog entry
15+
run: |
16+
if [ ! -f "changelog_entry.yaml" ]; then
17+
echo "Error: changelog_entry.yaml file is missing."
18+
echo "Please add a changelog_entry.yaml file at the root of the repository."
19+
exit 1
20+
fi
21+
22+
# Check if the file is empty
23+
if [ ! -s "changelog_entry.yaml" ]; then
24+
echo "Error: changelog_entry.yaml file is empty."
25+
echo "Please add content to the changelog_entry.yaml file."
26+
exit 1
27+
fi
28+
29+
echo "Changelog entry found and is not empty."

.github/workflows/push.yaml

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ on:
66
push:
77
branches:
88
- main
9+
paths:
10+
- pyproject.toml
911

1012
jobs:
1113
lint:
@@ -70,27 +72,14 @@ jobs:
7072
with:
7173
branch: gh-pages
7274
folder: docs/_build/html
73-
publish-to-pypi:
74-
name: Publish to PyPI
75-
runs-on: ubuntu-latest
76-
steps:
77-
- name: Checkout code
78-
uses: actions/checkout@v4
79-
with:
80-
fetch-depth: 0 # Fetch all history for all tags and branches
81-
- name: Set up Python
82-
uses: actions/setup-python@v5
83-
with:
84-
python-version: 3.12
85-
- name: Install package
86-
run: make install
87-
- name: Build package
88-
run: python -m build
8975
- name: Publish a git tag
9076
run: ".github/publish-git-tag.sh || true"
91-
- name: Publish to PyPI
77+
- name: Remove .whl files
78+
run: rm dist/*.whl
79+
- name: Publish a Python distribution to PyPI
9280
uses: pypa/gh-action-pypi-publish@release/v1
9381
with:
9482
user: __token__
9583
password: ${{ secrets.PYPI }}
9684
skip-existing: true
85+
verbose: true

.github/workflows/versioning.yaml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Workflow that runs on versioning metadata updates.
2+
3+
name: Versioning updates
4+
on:
5+
push:
6+
branches:
7+
- main
8+
9+
paths:
10+
- changelog_entry.yaml
11+
- "!pyproject.toml"
12+
13+
jobs:
14+
Versioning:
15+
runs-on: ubuntu-latest
16+
if: |
17+
(!(github.event.head_commit.message == 'Update package version'))
18+
steps:
19+
- name: Checkout repo
20+
uses: actions/checkout@v4
21+
with:
22+
repository: ${{ github.event.pull_request.head.repo.full_name }}
23+
ref: ${{ github.event.pull_request.head.ref }}
24+
token: ${{ secrets.POLICYENGINE_GITHUB }}
25+
- name: Setup Python
26+
uses: actions/setup-python@v5
27+
with:
28+
python-version: 3.12
29+
- name: Build changelog
30+
run: pip install yaml-changelog && make changelog
31+
- name: Preview changelog update
32+
run: ".github/get-changelog-diff.sh"
33+
- name: Update changelog
34+
uses: EndBug/add-and-commit@v9
35+
with:
36+
add: "."
37+
message: Update package version

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@
1414
!incomes_projection.csv
1515
!policyengine_uk_data/datasets/frs/local_areas/**/*.csv
1616
**/_build
17-
!policyengine_uk_data/storage/*.csv
17+
!policyengine_uk_data/storage/*.csv
18+
**/version.json

policyengine_uk_data/storage/upload_completed_datasets.py

Lines changed: 14 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,25 @@
11
from policyengine_uk_data.datasets import EnhancedFRS_2022_23, FRS_2022_23
22
from policyengine_uk_data.storage import STORAGE_FOLDER
3-
from policyengine_uk_data.utils.huggingface import upload
4-
from google.cloud import storage
5-
import google.auth
3+
from policyengine_uk_data.utils.data_upload import upload_data_files
64

75

86
def upload_datasets():
9-
credentials, project_id = google.auth.default()
10-
storage_client = storage.Client(
11-
credentials=credentials, project=project_id
12-
)
13-
bucket = storage_client.bucket("policyengine-uk-data-private")
14-
for dataset in [FRS_2022_23, EnhancedFRS_2022_23]:
15-
dataset = dataset()
16-
if not dataset.exists:
17-
raise ValueError(
18-
f"Dataset {dataset.name} does not exist at {dataset.file_path}."
19-
)
20-
21-
upload(
22-
dataset.file_path,
23-
"policyengine/policyengine-uk-data",
24-
dataset.file_path.name,
25-
)
26-
blob = dataset.file_path.name
27-
blob = bucket.blob(blob)
28-
blob.upload_from_filename(dataset.file_path)
29-
print(
30-
f"Uploaded {dataset.file_path.name} to GCS bucket policyengine-uk-data-private."
31-
)
32-
33-
# Constituency weights:
34-
35-
upload(
7+
dataset_files = [
8+
FRS_2022_23.file_path,
9+
EnhancedFRS_2022_23.file_path,
3610
STORAGE_FOLDER / "parliamentary_constituency_weights.h5",
37-
"policyengine/policyengine-uk-data",
38-
"parliamentary_constituency_weights.h5",
39-
)
40-
41-
blob = "parliamentary_constituency_weights.h5"
42-
blob = bucket.blob(blob)
43-
blob.upload_from_filename(
44-
STORAGE_FOLDER / "parliamentary_constituency_weights.h5"
45-
)
46-
print(
47-
f"Uploaded parliamentary_constituency_weights.h5 to GCS bucket policyengine-uk-data-private."
48-
)
49-
50-
# Local authority weights:
51-
52-
upload(
5311
STORAGE_FOLDER / "local_authority_weights.h5",
54-
"policyengine/policyengine-uk-data",
55-
"local_authority_weights.h5",
56-
)
12+
]
13+
14+
for file_path in dataset_files:
15+
if not file_path.exists():
16+
raise ValueError(f"File {file_path} does not exist.")
5717

58-
blob = "local_authority_weights.h5"
59-
blob = bucket.blob(blob)
60-
blob.upload_from_filename(STORAGE_FOLDER / "local_authority_weights.h5")
61-
print(
62-
f"Uploaded local_authority_weights.h5 to GCS bucket policyengine-uk-data-private."
18+
upload_data_files(
19+
files=dataset_files,
20+
hf_repo_name="policyengine-uk-data/policyengine-uk-data-private",
21+
hf_repo_type="model",
22+
gcs_bucket_name="policyengine-uk-data-private",
6323
)
6424

6525

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
from typing import List
2+
from huggingface_hub import HfApi, CommitOperationAdd
3+
from huggingface_hub.errors import RevisionNotFoundError
4+
from google.cloud import storage
5+
from pathlib import Path
6+
from importlib import metadata
7+
import google.auth
8+
import logging
9+
10+
11+
def upload_data_files(
12+
files: List[str],
13+
gcs_bucket_name: str = "policyengine-uk-data-private",
14+
hf_repo_name: str = "policyengine/policyengine-uk-data",
15+
hf_repo_type: str = "model",
16+
version: str = None,
17+
):
18+
if version is None:
19+
version = metadata.version("policyengine-uk-data")
20+
21+
upload_files_to_hf(
22+
files=files,
23+
version=version,
24+
hf_repo_name=hf_repo_name,
25+
hf_repo_type=hf_repo_type,
26+
)
27+
28+
upload_files_to_gcs(
29+
files=files,
30+
version=version,
31+
gcs_bucket_name=gcs_bucket_name,
32+
)
33+
34+
35+
def upload_files_to_hf(
36+
files: List[str],
37+
version: str,
38+
hf_repo_name: str = "policyengine/policyengine-uk-data-private",
39+
hf_repo_type: str = "model",
40+
):
41+
"""
42+
Upload files to Hugging Face repository and tag the commit with the version.
43+
"""
44+
api = HfApi()
45+
hf_operations = []
46+
47+
for file_path in files:
48+
file_path = Path(file_path)
49+
if not file_path.exists():
50+
raise ValueError(f"File {file_path} does not exist.")
51+
hf_operations.append(
52+
CommitOperationAdd(
53+
path_in_repo=file_path.name,
54+
path_or_fileobj=str(file_path),
55+
)
56+
)
57+
commit_info = api.create_commit(
58+
repo_id=hf_repo_name,
59+
operations=hf_operations,
60+
repo_type=hf_repo_type,
61+
commit_message=f"Upload data files for version {version}",
62+
)
63+
logging.info(f"Uploaded files to Hugging Face repository {hf_repo_name}.")
64+
65+
# Tag commit with version
66+
api.create_tag(
67+
repo_id=hf_repo_name,
68+
tag=version,
69+
revision=commit_info.oid,
70+
repo_type=hf_repo_type,
71+
)
72+
logging.info(
73+
f"Tagged commit with {version} in Hugging Face repository {hf_repo_name}."
74+
)
75+
76+
77+
def upload_files_to_gcs(
78+
files: List[str],
79+
version: str,
80+
gcs_bucket_name: str = "policyengine-uk-data-private",
81+
):
82+
"""
83+
Upload files to Google Cloud Storage and set metadata with the version.
84+
"""
85+
credentials, project_id = google.auth.default()
86+
storage_client = storage.Client(
87+
credentials=credentials, project=project_id
88+
)
89+
bucket = storage_client.bucket(gcs_bucket_name)
90+
91+
for file_path in files:
92+
file_path = Path(file_path)
93+
blob = bucket.blob(file_path.name)
94+
blob.upload_from_filename(file_path)
95+
logging.info(
96+
f"Uploaded {file_path.name} to GCS bucket {gcs_bucket_name}."
97+
)
98+
99+
# Set metadata
100+
blob.metadata = {"version": version}
101+
blob.patch()
102+
logging.info(
103+
f"Set metadata for {file_path.name} in GCS bucket {gcs_bucket_name}."
104+
)

0 commit comments

Comments
 (0)