Skip to content

Run Pipeline

Run Pipeline #286

Workflow file for this run

name: Run Pipeline
on:
workflow_dispatch:
inputs:
gpu:
description: "GPU type for regional calibration"
default: "T4"
type: string
national_gpu:
description: "GPU type for national calibration"
default: "T4"
type: string
epochs:
description: "Epochs for regional calibration"
default: "1000"
type: string
national_epochs:
description: "Epochs for national calibration"
default: "1000"
type: string
num_workers:
description: "Number of parallel H5 workers"
default: "50"
type: string
skip_national:
description: "Skip national calibration/H5"
default: false
type: boolean
resume_run_id:
description: "Resume a failed run by ID (allows mixed provenance)"
default: ""
type: string
candidate_version:
description: "Candidate staging scope used for HF staging"
default: ""
type: string
base_release_version:
description: "Stable release version current when the candidate was built"
default: ""
type: string
release_bump:
description: "Intended SemVer bump for this candidate: major, minor, or patch"
default: ""
type: string
run_id:
description: "Run ID to use across GitHub, Modal, and HF staging"
default: ""
type: string
source_sha:
description: "Exact policyengine-us-data commit SHA to deploy"
default: ""
type: string
chunked_matrix:
description: "Build the calibration matrix in chunks (opt-in)"
default: false
type: boolean
chunk_size:
description: "Clone-household columns per chunk"
default: "25000"
type: string
parallel_matrix:
description: "Fan chunked matrix building across Modal workers"
default: false
type: boolean
num_matrix_workers:
description: "Number of Modal workers for parallel matrix build"
default: "50"
type: string
concurrency:
group: pipeline-${{ github.run_id }}-${{ github.run_attempt }}
cancel-in-progress: false
jobs:
pipeline:
runs-on: ubuntu-latest
env:
MODAL_ENVIRONMENT: main
US_DATA_MODAL_APP_PREFIX: us-data
US_DATA_RUN_ID: ${{ inputs.run_id || '' }}
steps:
- uses: actions/checkout@v6
with:
ref: ${{ inputs.source_sha || github.sha }}
- uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Install Modal Runner Deps
run: pip install modal pandas
- name: Resolve run context
id: run-context
env:
CANDIDATE_VERSION: ${{ inputs.candidate_version || '' }}
BASE_RELEASE_VERSION: ${{ inputs.base_release_version || '' }}
RELEASE_BUMP: ${{ inputs.release_bump || '' }}
run: python .github/scripts/resolve_run_context.py
- name: Require pyproject.toml to match finalized HF release base
run: python .github/scripts/check_data_release_version.py --mode fail
- name: Deploy and launch pipeline on Modal
env:
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
PIPELINE_BRANCH: main
GPU: ${{ inputs.gpu || 'T4' }}
NATIONAL_GPU: ${{ inputs.national_gpu || 'T4' }}
EPOCHS: ${{ inputs.epochs || '1000' }}
NATIONAL_EPOCHS: ${{ inputs.national_epochs || '1000' }}
NUM_WORKERS: ${{ inputs.num_workers || '50' }}
SKIP_NATIONAL: ${{ inputs.skip_national || 'false' }}
RESUME_RUN_ID: ${{ inputs.resume_run_id || '' }}
CANDIDATE_VERSION: ${{ inputs.candidate_version || '' }}
BASE_RELEASE_VERSION: ${{ inputs.base_release_version || '' }}
RELEASE_BUMP: ${{ inputs.release_bump || '' }}
SOURCE_SHA: ${{ inputs.source_sha || github.sha }}
CHUNKED_MATRIX: ${{ inputs.chunked_matrix || 'false' }}
CHUNK_SIZE: ${{ inputs.chunk_size || '25000' }}
PARALLEL_MATRIX: ${{ inputs.parallel_matrix || 'false' }}
NUM_MATRIX_WORKERS: ${{ inputs.num_matrix_workers || '50' }}
run: |
modal deploy --env="${MODAL_ENVIRONMENT}" --name="${US_DATA_MODAL_APP_NAME}" --tag="${US_DATA_RUN_ID}" modal_app/pipeline.py
python .github/scripts/spawn_modal_pipeline.py