From 684f860dc718a92ddaf2882a836571e60e9a5df2 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Fri, 23 May 2025 15:03:58 +0100 Subject: [PATCH 1/9] Add version information to data locations Fixes #104 --- .github/workflows/changelog_entry.yaml | 29 +++++++++++++++ .github/workflows/publish_package.yaml | 35 ++++++++++++++++++ .github/workflows/push.yaml | 26 +------------ .github/workflows/versioning.yaml | 37 +++++++++++++++++++ .gitignore | 3 +- .../storage/upload_completed_datasets.py | 22 +++++++++++ 6 files changed, 127 insertions(+), 25 deletions(-) create mode 100644 .github/workflows/changelog_entry.yaml create mode 100644 .github/workflows/publish_package.yaml create mode 100644 .github/workflows/versioning.yaml diff --git a/.github/workflows/changelog_entry.yaml b/.github/workflows/changelog_entry.yaml new file mode 100644 index 000000000..d0eb8574d --- /dev/null +++ b/.github/workflows/changelog_entry.yaml @@ -0,0 +1,29 @@ +name: Versioning + +on: + pull_request: + branches: [ main ] + +jobs: + check-changelog-entry: + name: Changelog entry check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Check for changelog entry + run: | + if [ ! -f "changelog_entry.yaml" ]; then + echo "Error: changelog_entry.yaml file is missing." + echo "Please add a changelog_entry.yaml file at the root of the repository." + exit 1 + fi + + # Check if the file is empty + if [ ! -s "changelog_entry.yaml" ]; then + echo "Error: changelog_entry.yaml file is empty." + echo "Please add content to the changelog_entry.yaml file." + exit 1 + fi + + echo "Changelog entry found and is not empty." \ No newline at end of file diff --git a/.github/workflows/publish_package.yaml b/.github/workflows/publish_package.yaml new file mode 100644 index 000000000..6584dcdd9 --- /dev/null +++ b/.github/workflows/publish_package.yaml @@ -0,0 +1,35 @@ +name: Publish package +on: + push: + branches: [ main ] + paths: + - pyproject.toml + +jobs: + Publish: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v2 + - name: Install uv + uses: astral-sh/setup-uv@v5 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.11' + - name: Publish a git tag + run: ".github/publish-git-tag.sh || true" + - name: Install package + run: make install + - name: Build package + run: make + - name: Remove .whl files + run: rm dist/*.whl + - name: Publish a Python distribution to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI }} + skip-existing: true + verbose: true diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index 71864d9bc..d2a8a5282 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -6,6 +6,8 @@ on: push: branches: - main + paths: + - pyproject.toml jobs: lint: @@ -70,27 +72,3 @@ jobs: with: branch: gh-pages folder: docs/_build/html - publish-to-pypi: - name: Publish to PyPI - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Fetch all history for all tags and branches - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.12 - - name: Install package - run: make install - - name: Build package - run: python -m build - - name: Publish a git tag - run: ".github/publish-git-tag.sh || true" - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.PYPI }} - skip-existing: true diff --git a/.github/workflows/versioning.yaml b/.github/workflows/versioning.yaml new file mode 100644 index 000000000..b4e0edb9e --- /dev/null +++ b/.github/workflows/versioning.yaml @@ -0,0 +1,37 @@ +# Workflow that runs on versioning metadata updates. + +name: Versioning updates +on: + push: + branches: + - main + + paths: + - changelog_entry.yaml + - "!pyproject.toml" + +jobs: + Versioning: + runs-on: ubuntu-latest + if: | + (!(github.event.head_commit.message == 'Update package version')) + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.ref }} + token: ${{ secrets.POLICYENGINE_GITHUB }} + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: 3.12 + - name: Build changelog + run: pip install yaml-changelog && make changelog + - name: Preview changelog update + run: ".github/get-changelog-diff.sh" + - name: Update changelog + uses: EndBug/add-and-commit@v9 + with: + add: "." + message: Update package version \ No newline at end of file diff --git a/.gitignore b/.gitignore index 52b67116f..9d52f6ca9 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,5 @@ !incomes_projection.csv !policyengine_uk_data/datasets/frs/local_areas/**/*.csv **/_build -!policyengine_uk_data/storage/*.csv \ No newline at end of file +!policyengine_uk_data/storage/*.csv +**/version.json diff --git a/policyengine_uk_data/storage/upload_completed_datasets.py b/policyengine_uk_data/storage/upload_completed_datasets.py index 21f61fe1d..46f9428b4 100644 --- a/policyengine_uk_data/storage/upload_completed_datasets.py +++ b/policyengine_uk_data/storage/upload_completed_datasets.py @@ -3,6 +3,9 @@ from policyengine_uk_data.utils.huggingface import upload from google.cloud import storage import google.auth +import os +from importlib import metadata +import json def upload_datasets(): @@ -11,6 +14,14 @@ def upload_datasets(): credentials=credentials, project=project_id ) bucket = storage_client.bucket("policyengine-uk-data-private") + + # Upload versions + + with open(STORAGE_FOLDER / "version.json", "w") as f: + f.write( + json.dumps({"version": metadata.version("policyengine-uk-data")}) + ) + for dataset in [FRS_2022_23, EnhancedFRS_2022_23]: dataset = dataset() if not dataset.exists: @@ -62,6 +73,17 @@ def upload_datasets(): f"Uploaded local_authority_weights.h5 to GCS bucket policyengine-uk-data-private." ) + upload( + STORAGE_FOLDER / "version.json", + "policyengine/policyengine-uk-data", + "version.json", + ) + + blob = "version.json" + blob = bucket.blob(blob) + blob.upload_from_filename(STORAGE_FOLDER / "version.json") + print(f"Uploaded version.json to GCS bucket policyengine-uk-data-private.") + if __name__ == "__main__": upload_datasets() From 664cc934ef6e70e9050cbdf706e74ae87329cc63 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 26 May 2025 11:58:14 +0100 Subject: [PATCH 2/9] Set metadata in versioning --- .../storage/upload_completed_datasets.py | 88 +++---------------- policyengine_uk_data/utils/data_upload.py | 83 +++++++++++++++++ 2 files changed, 96 insertions(+), 75 deletions(-) create mode 100644 policyengine_uk_data/utils/data_upload.py diff --git a/policyengine_uk_data/storage/upload_completed_datasets.py b/policyengine_uk_data/storage/upload_completed_datasets.py index 46f9428b4..59d93fdfa 100644 --- a/policyengine_uk_data/storage/upload_completed_datasets.py +++ b/policyengine_uk_data/storage/upload_completed_datasets.py @@ -1,89 +1,27 @@ from policyengine_uk_data.datasets import EnhancedFRS_2022_23, FRS_2022_23 from policyengine_uk_data.storage import STORAGE_FOLDER -from policyengine_uk_data.utils.huggingface import upload -from google.cloud import storage -import google.auth -import os -from importlib import metadata -import json +from policyengine_uk_data.utils.data_upload import upload_data_files def upload_datasets(): - credentials, project_id = google.auth.default() - storage_client = storage.Client( - credentials=credentials, project=project_id - ) - bucket = storage_client.bucket("policyengine-uk-data-private") - - # Upload versions - - with open(STORAGE_FOLDER / "version.json", "w") as f: - f.write( - json.dumps({"version": metadata.version("policyengine-uk-data")}) - ) - - for dataset in [FRS_2022_23, EnhancedFRS_2022_23]: - dataset = dataset() - if not dataset.exists: - raise ValueError( - f"Dataset {dataset.name} does not exist at {dataset.file_path}." - ) - - upload( - dataset.file_path, - "policyengine/policyengine-uk-data", - dataset.file_path.name, - ) - blob = dataset.file_path.name - blob = bucket.blob(blob) - blob.upload_from_filename(dataset.file_path) - print( - f"Uploaded {dataset.file_path.name} to GCS bucket policyengine-uk-data-private." - ) - - # Constituency weights: - - upload( + dataset_files = [ + FRS_2022_23.file_path, + EnhancedFRS_2022_23.file_path, STORAGE_FOLDER / "parliamentary_constituency_weights.h5", - "policyengine/policyengine-uk-data", - "parliamentary_constituency_weights.h5", - ) - - blob = "parliamentary_constituency_weights.h5" - blob = bucket.blob(blob) - blob.upload_from_filename( - STORAGE_FOLDER / "parliamentary_constituency_weights.h5" - ) - print( - f"Uploaded parliamentary_constituency_weights.h5 to GCS bucket policyengine-uk-data-private." - ) - - # Local authority weights: - - upload( STORAGE_FOLDER / "local_authority_weights.h5", - "policyengine/policyengine-uk-data", - "local_authority_weights.h5", - ) + ] - blob = "local_authority_weights.h5" - blob = bucket.blob(blob) - blob.upload_from_filename(STORAGE_FOLDER / "local_authority_weights.h5") - print( - f"Uploaded local_authority_weights.h5 to GCS bucket policyengine-uk-data-private." - ) + for file_path in dataset_files: + if not file_path.exists(): + raise ValueError(f"File {file_path} does not exist.") - upload( - STORAGE_FOLDER / "version.json", - "policyengine/policyengine-uk-data", - "version.json", + upload_data_files( + files=dataset_files, + hf_repo_name="policyengine-uk-data/datasets", + hf_repo_type="dataset", + gcs_bucket_name="policyengine-uk-data", ) - blob = "version.json" - blob = bucket.blob(blob) - blob.upload_from_filename(STORAGE_FOLDER / "version.json") - print(f"Uploaded version.json to GCS bucket policyengine-uk-data-private.") - if __name__ == "__main__": upload_datasets() diff --git a/policyengine_uk_data/utils/data_upload.py b/policyengine_uk_data/utils/data_upload.py new file mode 100644 index 000000000..0b1230de0 --- /dev/null +++ b/policyengine_uk_data/utils/data_upload.py @@ -0,0 +1,83 @@ +from typing import List +from huggingface_hub import HfApi, CommitOperationAdd +from huggingface_hub.errors import RevisionNotFoundError +from google.cloud import storage +from pathlib import Path +from importlib import metadata +import google.auth + + +def upload_data_files( + files: List[str], + gcs_bucket_name: str = "policyengine-uk-data-private", + hf_repo_name: str = "policyengine/policyengine-uk-data", + hf_repo_type: str = "model", +): + version = metadata.version("policyengine-uk-data") + + api = HfApi() + hf_operations = [] + + for file_path in files: + file_path = Path(file_path) + if not file_path.exists(): + raise ValueError(f"File {file_path} does not exist.") + hf_operations.append( + CommitOperationAdd( + path_in_repo=file_path.name, + path_or_fileobj=str(file_path), + ) + ) + commit_info = api.create_commit( + repo_id=hf_repo_name, + operations=hf_operations, + repo_type=hf_repo_type, + commit_message=f"Upload data files for version {version}", + ) + print(f"Uploaded files to Hugging Face repository {hf_repo_name}.") + # Tag commit with version + tag_name = version + + # Delete the tag if it already exists to ensure the new commit is tagged. + # missing_ok=True ensures that if the tag doesn't exist, no error is raised. + + try: + api.delete_tag( + repo_id=hf_repo_name, + tag=tag_name, + repo_type=hf_repo_type, + ) + print(f"Tag {version} already exists: deleting the old tag.") + except RevisionNotFoundError: + pass + + # Create the new tag + api.create_tag( + repo_id=hf_repo_name, + tag=tag_name, + revision=commit_info.oid, + repo_type=hf_repo_type, + ) + print( + f"Tagged commit with {tag_name} in Hugging Face repository {hf_repo_name}." + ) + + # Upload to GCS + credentials, project_id = google.auth.default() + storage_client = storage.Client( + credentials=credentials, project=project_id + ) + bucket = storage_client.bucket(gcs_bucket_name) + for file_path in files: + file_path = Path(file_path) + blob = bucket.blob(file_path.name) + blob.metadata = {"version": version} + blob.upload_from_filename(file_path) + print(f"Uploaded {file_path.name} to GCS bucket {gcs_bucket_name}.") + + # Set metadata + blob.metadata = {"version": version} + blob.patch() + print( + f"Set metadata for {file_path.name} in GCS bucket {gcs_bucket_name}." + ) From b0481c60ca536d372a395e9e260177c84447e2e2 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 26 May 2025 17:29:49 +0100 Subject: [PATCH 3/9] Address comments --- policyengine_uk_data/utils/data_upload.py | 34 ++++++++--------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/policyengine_uk_data/utils/data_upload.py b/policyengine_uk_data/utils/data_upload.py index 0b1230de0..d03b76b81 100644 --- a/policyengine_uk_data/utils/data_upload.py +++ b/policyengine_uk_data/utils/data_upload.py @@ -5,6 +5,7 @@ from pathlib import Path from importlib import metadata import google.auth +import logging def upload_data_files( @@ -12,8 +13,10 @@ def upload_data_files( gcs_bucket_name: str = "policyengine-uk-data-private", hf_repo_name: str = "policyengine/policyengine-uk-data", hf_repo_type: str = "model", + version: str = None, ): - version = metadata.version("policyengine-uk-data") + if version is None: + version = metadata.version("policyengine-uk-data") api = HfApi() hf_operations = [] @@ -34,32 +37,18 @@ def upload_data_files( repo_type=hf_repo_type, commit_message=f"Upload data files for version {version}", ) - print(f"Uploaded files to Hugging Face repository {hf_repo_name}.") + logging.info(f"Uploaded files to Hugging Face repository {hf_repo_name}.") # Tag commit with version - tag_name = version - - # Delete the tag if it already exists to ensure the new commit is tagged. - # missing_ok=True ensures that if the tag doesn't exist, no error is raised. - - try: - api.delete_tag( - repo_id=hf_repo_name, - tag=tag_name, - repo_type=hf_repo_type, - ) - print(f"Tag {version} already exists: deleting the old tag.") - except RevisionNotFoundError: - pass # Create the new tag api.create_tag( repo_id=hf_repo_name, - tag=tag_name, + tag=version, revision=commit_info.oid, repo_type=hf_repo_type, ) - print( - f"Tagged commit with {tag_name} in Hugging Face repository {hf_repo_name}." + logging.info( + f"Tagged commit with {version} in Hugging Face repository {hf_repo_name}." ) # Upload to GCS @@ -71,13 +60,14 @@ def upload_data_files( for file_path in files: file_path = Path(file_path) blob = bucket.blob(file_path.name) - blob.metadata = {"version": version} blob.upload_from_filename(file_path) - print(f"Uploaded {file_path.name} to GCS bucket {gcs_bucket_name}.") + logging.info( + f"Uploaded {file_path.name} to GCS bucket {gcs_bucket_name}." + ) # Set metadata blob.metadata = {"version": version} blob.patch() - print( + logging.info( f"Set metadata for {file_path.name} in GCS bucket {gcs_bucket_name}." ) From c620229ce9efbc0db0c24256883dfa68f7938b61 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 26 May 2025 17:35:53 +0100 Subject: [PATCH 4/9] Refactor to split up HF and GCS --- policyengine_uk_data/utils/data_upload.py | 37 +++++++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/policyengine_uk_data/utils/data_upload.py b/policyengine_uk_data/utils/data_upload.py index d03b76b81..dc6f98005 100644 --- a/policyengine_uk_data/utils/data_upload.py +++ b/policyengine_uk_data/utils/data_upload.py @@ -18,6 +18,29 @@ def upload_data_files( if version is None: version = metadata.version("policyengine-uk-data") + upload_files_to_hf( + files=files, + version=version, + hf_repo_name=hf_repo_name, + hf_repo_type=hf_repo_type, + ) + + upload_files_to_gcs( + files=files, + version=version, + gcs_bucket_name=gcs_bucket_name, + ) + + +def upload_files_to_hf( + files: List[str], + version: str, + hf_repo_name: str = "policyengine/policyengine-uk-data", + hf_repo_type: str = "model", +): + """ + Upload files to Hugging Face repository and tag the commit with the version. + """ api = HfApi() hf_operations = [] @@ -38,9 +61,8 @@ def upload_data_files( commit_message=f"Upload data files for version {version}", ) logging.info(f"Uploaded files to Hugging Face repository {hf_repo_name}.") - # Tag commit with version - # Create the new tag + # Tag commit with version api.create_tag( repo_id=hf_repo_name, tag=version, @@ -51,12 +73,21 @@ def upload_data_files( f"Tagged commit with {version} in Hugging Face repository {hf_repo_name}." ) - # Upload to GCS + +def upload_files_to_gcs( + files: List[str], + version: str, + gcs_bucket_name: str = "policyengine-uk-data-private", +): + """ + Upload files to Google Cloud Storage and set metadata with the version. + """ credentials, project_id = google.auth.default() storage_client = storage.Client( credentials=credentials, project=project_id ) bucket = storage_client.bucket(gcs_bucket_name) + for file_path in files: file_path = Path(file_path) blob = bucket.blob(file_path.name) From 4d218aae7009fcb25a314a50b704c523f96b2cb3 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 26 May 2025 21:50:13 +0100 Subject: [PATCH 5/9] Only publish after tests run --- .github/workflows/publish_package.yaml | 35 -------------------------- .github/workflows/push.yaml | 13 ++++++++++ 2 files changed, 13 insertions(+), 35 deletions(-) delete mode 100644 .github/workflows/publish_package.yaml diff --git a/.github/workflows/publish_package.yaml b/.github/workflows/publish_package.yaml deleted file mode 100644 index 6584dcdd9..000000000 --- a/.github/workflows/publish_package.yaml +++ /dev/null @@ -1,35 +0,0 @@ -name: Publish package -on: - push: - branches: [ main ] - paths: - - pyproject.toml - -jobs: - Publish: - runs-on: ubuntu-latest - steps: - - name: Checkout repo - uses: actions/checkout@v2 - - name: Install uv - uses: astral-sh/setup-uv@v5 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.11' - - name: Publish a git tag - run: ".github/publish-git-tag.sh || true" - - name: Install package - run: make install - - name: Build package - run: make - - name: Remove .whl files - run: rm dist/*.whl - - name: Publish a Python distribution to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.PYPI }} - skip-existing: true - verbose: true diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index d2a8a5282..553766939 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -72,3 +72,16 @@ jobs: with: branch: gh-pages folder: docs/_build/html + - name: Build package + run: make + - name: Publish a git tag + run: ".github/publish-git-tag.sh || true" + - name: Remove .whl files + run: rm dist/*.whl + - name: Publish a Python distribution to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI }} + skip-existing: true + verbose: true From 265288801e496c8d55be8795da2069b4f1171f91 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 26 May 2025 21:51:19 +0100 Subject: [PATCH 6/9] Remove redundant make step --- .github/workflows/push.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index 553766939..7bd46d26d 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -72,8 +72,6 @@ jobs: with: branch: gh-pages folder: docs/_build/html - - name: Build package - run: make - name: Publish a git tag run: ".github/publish-git-tag.sh || true" - name: Remove .whl files From 062cbab18c789b399cd9f743db8f45ab1d634705 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 26 May 2025 22:00:13 +0100 Subject: [PATCH 7/9] Fix typo in HF repo name --- policyengine_uk_data/storage/upload_completed_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/policyengine_uk_data/storage/upload_completed_datasets.py b/policyengine_uk_data/storage/upload_completed_datasets.py index 59d93fdfa..9c3b06262 100644 --- a/policyengine_uk_data/storage/upload_completed_datasets.py +++ b/policyengine_uk_data/storage/upload_completed_datasets.py @@ -17,7 +17,7 @@ def upload_datasets(): upload_data_files( files=dataset_files, - hf_repo_name="policyengine-uk-data/datasets", + hf_repo_name="policyengine-uk-data/policyengine-uk-data-private", hf_repo_type="dataset", gcs_bucket_name="policyengine-uk-data", ) From c3298953403c2645c7b89d2e7c76e121e74a3294 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 26 May 2025 22:01:06 +0100 Subject: [PATCH 8/9] Fix typos in names --- policyengine_uk_data/storage/upload_completed_datasets.py | 2 +- policyengine_uk_data/utils/data_upload.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/policyengine_uk_data/storage/upload_completed_datasets.py b/policyengine_uk_data/storage/upload_completed_datasets.py index 9c3b06262..5a492c9a3 100644 --- a/policyengine_uk_data/storage/upload_completed_datasets.py +++ b/policyengine_uk_data/storage/upload_completed_datasets.py @@ -19,7 +19,7 @@ def upload_datasets(): files=dataset_files, hf_repo_name="policyengine-uk-data/policyengine-uk-data-private", hf_repo_type="dataset", - gcs_bucket_name="policyengine-uk-data", + gcs_bucket_name="policyengine-uk-data-private", ) diff --git a/policyengine_uk_data/utils/data_upload.py b/policyengine_uk_data/utils/data_upload.py index dc6f98005..42d0fec24 100644 --- a/policyengine_uk_data/utils/data_upload.py +++ b/policyengine_uk_data/utils/data_upload.py @@ -35,7 +35,7 @@ def upload_data_files( def upload_files_to_hf( files: List[str], version: str, - hf_repo_name: str = "policyengine/policyengine-uk-data", + hf_repo_name: str = "policyengine/policyengine-uk-data-private", hf_repo_type: str = "model", ): """ From d5da6604e2d1ff66f5ace44c6ff075d38fac73c6 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 26 May 2025 22:30:52 +0100 Subject: [PATCH 9/9] Catch super tiny bug --- policyengine_uk_data/storage/upload_completed_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/policyengine_uk_data/storage/upload_completed_datasets.py b/policyengine_uk_data/storage/upload_completed_datasets.py index 5a492c9a3..3eb02385c 100644 --- a/policyengine_uk_data/storage/upload_completed_datasets.py +++ b/policyengine_uk_data/storage/upload_completed_datasets.py @@ -18,7 +18,7 @@ def upload_datasets(): upload_data_files( files=dataset_files, hf_repo_name="policyengine-uk-data/policyengine-uk-data-private", - hf_repo_type="dataset", + hf_repo_type="model", gcs_bucket_name="policyengine-uk-data-private", )