-
Notifications
You must be signed in to change notification settings - Fork 11
136 lines (127 loc) · 4.8 KB
/
pipeline.yaml
File metadata and controls
136 lines (127 loc) · 4.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
name: Run Pipeline
on:
workflow_dispatch:
inputs:
gpu:
description: "GPU type for regional calibration"
default: "T4"
type: string
national_gpu:
description: "GPU type for national calibration"
default: "T4"
type: string
epochs:
description: "Epochs for regional calibration"
default: "1000"
type: string
national_epochs:
description: "Epochs for national calibration"
default: "1000"
type: string
num_workers:
description: "Number of parallel H5 workers"
default: "50"
type: string
skip_national:
description: "Skip national calibration/H5"
default: false
type: boolean
resume_run_id:
description: "Resume a failed run by ID (allows mixed provenance)"
default: ""
type: string
candidate_version:
description: "Candidate staging scope used for HF staging"
default: ""
type: string
base_release_version:
description: "Stable release version current when the candidate was built"
default: ""
type: string
release_bump:
description: "Intended SemVer bump for this candidate: major, minor, or patch"
default: ""
type: string
run_id:
description: "Run ID to use across GitHub, Modal, and HF staging"
default: ""
type: string
source_sha:
description: "Exact policyengine-us-data commit SHA to deploy"
default: ""
type: string
chunked_matrix:
description: "Build the calibration matrix in chunks (opt-in)"
default: false
type: boolean
chunk_size:
description: "Clone-household columns per chunk"
default: "25000"
type: string
parallel_matrix:
description: "Fan chunked matrix building across Modal workers"
default: false
type: boolean
num_matrix_workers:
description: "Number of Modal workers for parallel matrix build"
default: "50"
type: string
allow_stale_policyengine_us:
description: "Allow production build when policyengine-us lags the latest PyPI release"
default: false
type: boolean
concurrency:
group: pipeline-${{ github.run_id }}-${{ github.run_attempt }}
cancel-in-progress: false
jobs:
pipeline:
runs-on: ubuntu-latest
env:
MODAL_ENVIRONMENT: main
US_DATA_MODAL_APP_PREFIX: us-data
US_DATA_RUN_ID: ${{ inputs.run_id || '' }}
steps:
- uses: actions/checkout@v6
with:
ref: ${{ inputs.source_sha || github.sha }}
- uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Install Modal Runner Deps
run: pip install modal pandas
- name: Resolve run context
id: run-context
env:
CANDIDATE_VERSION: ${{ inputs.candidate_version || '' }}
BASE_RELEASE_VERSION: ${{ inputs.base_release_version || '' }}
RELEASE_BUMP: ${{ inputs.release_bump || '' }}
run: python .github/scripts/resolve_run_context.py
- name: Require current PolicyEngine US dependency
env:
POLICYENGINE_US_ALLOW_STALE: ${{ inputs.allow_stale_policyengine_us }}
run: python .github/scripts/check_policyengine_us_dependency.py --mode fail
- name: Require pyproject.toml to match finalized HF release base
run: python .github/scripts/check_data_release_version.py --mode fail
- name: Deploy and launch pipeline on Modal
env:
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
PIPELINE_BRANCH: main
GPU: ${{ inputs.gpu || 'T4' }}
NATIONAL_GPU: ${{ inputs.national_gpu || 'T4' }}
EPOCHS: ${{ inputs.epochs || '1000' }}
NATIONAL_EPOCHS: ${{ inputs.national_epochs || '1000' }}
NUM_WORKERS: ${{ inputs.num_workers || '50' }}
SKIP_NATIONAL: ${{ inputs.skip_national || 'false' }}
RESUME_RUN_ID: ${{ inputs.resume_run_id || '' }}
CANDIDATE_VERSION: ${{ inputs.candidate_version || '' }}
BASE_RELEASE_VERSION: ${{ inputs.base_release_version || '' }}
RELEASE_BUMP: ${{ inputs.release_bump || '' }}
SOURCE_SHA: ${{ inputs.source_sha || github.sha }}
CHUNKED_MATRIX: ${{ inputs.chunked_matrix || 'false' }}
CHUNK_SIZE: ${{ inputs.chunk_size || '25000' }}
PARALLEL_MATRIX: ${{ inputs.parallel_matrix || 'false' }}
NUM_MATRIX_WORKERS: ${{ inputs.num_matrix_workers || '50' }}
run: |
modal deploy --env="${MODAL_ENVIRONMENT}" --name="${US_DATA_MODAL_APP_NAME}" --tag="${US_DATA_RUN_ID}" modal_app/pipeline.py
python .github/scripts/spawn_modal_pipeline.py