-
Notifications
You must be signed in to change notification settings - Fork 11
126 lines (119 loc) · 4.4 KB
/
pipeline.yaml
File metadata and controls
126 lines (119 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
name: Run Pipeline
on:
workflow_dispatch:
inputs:
gpu:
description: "GPU type for regional calibration"
default: "T4"
type: string
national_gpu:
description: "GPU type for national calibration"
default: "T4"
type: string
epochs:
description: "Epochs for regional calibration"
default: "1000"
type: string
national_epochs:
description: "Epochs for national calibration"
default: "1000"
type: string
num_workers:
description: "Number of parallel H5 workers"
default: "50"
type: string
skip_national:
description: "Skip national calibration/H5"
default: false
type: boolean
resume_run_id:
description: "Resume a failed run by ID (allows mixed provenance)"
default: ""
type: string
candidate_version:
description: "Candidate staging scope used for HF staging"
default: ""
type: string
base_release_version:
description: "Stable release version current when the candidate was built"
default: ""
type: string
release_bump:
description: "Intended SemVer bump for this candidate: major, minor, or patch"
default: ""
type: string
run_id:
description: "Run ID to use across GitHub, Modal, and HF staging"
default: ""
type: string
source_sha:
description: "Exact policyengine-us-data commit SHA to deploy"
default: ""
type: string
chunked_matrix:
description: "Build the calibration matrix in chunks (opt-in)"
default: false
type: boolean
chunk_size:
description: "Clone-household columns per chunk"
default: "25000"
type: string
parallel_matrix:
description: "Fan chunked matrix building across Modal workers"
default: false
type: boolean
num_matrix_workers:
description: "Number of Modal workers for parallel matrix build"
default: "50"
type: string
concurrency:
group: pipeline-${{ github.run_id }}-${{ github.run_attempt }}
cancel-in-progress: false
jobs:
pipeline:
runs-on: ubuntu-latest
env:
MODAL_ENVIRONMENT: main
US_DATA_MODAL_APP_PREFIX: us-data
US_DATA_RUN_ID: ${{ inputs.run_id || '' }}
steps:
- uses: actions/checkout@v6
with:
ref: ${{ inputs.source_sha || github.sha }}
- uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Install Modal Runner Deps
run: pip install modal pandas
- name: Resolve run context
id: run-context
env:
CANDIDATE_VERSION: ${{ inputs.candidate_version || '' }}
BASE_RELEASE_VERSION: ${{ inputs.base_release_version || '' }}
RELEASE_BUMP: ${{ inputs.release_bump || '' }}
run: python .github/scripts/resolve_run_context.py
- name: Require pyproject.toml to match finalized HF release base
run: python .github/scripts/check_data_release_version.py --mode fail
- name: Deploy and launch pipeline on Modal
env:
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
PIPELINE_BRANCH: main
GPU: ${{ inputs.gpu || 'T4' }}
NATIONAL_GPU: ${{ inputs.national_gpu || 'T4' }}
EPOCHS: ${{ inputs.epochs || '1000' }}
NATIONAL_EPOCHS: ${{ inputs.national_epochs || '1000' }}
NUM_WORKERS: ${{ inputs.num_workers || '50' }}
SKIP_NATIONAL: ${{ inputs.skip_national || 'false' }}
RESUME_RUN_ID: ${{ inputs.resume_run_id || '' }}
CANDIDATE_VERSION: ${{ inputs.candidate_version || '' }}
BASE_RELEASE_VERSION: ${{ inputs.base_release_version || '' }}
RELEASE_BUMP: ${{ inputs.release_bump || '' }}
SOURCE_SHA: ${{ inputs.source_sha || github.sha }}
CHUNKED_MATRIX: ${{ inputs.chunked_matrix || 'false' }}
CHUNK_SIZE: ${{ inputs.chunk_size || '25000' }}
PARALLEL_MATRIX: ${{ inputs.parallel_matrix || 'false' }}
NUM_MATRIX_WORKERS: ${{ inputs.num_matrix_workers || '50' }}
run: |
modal deploy --env="${MODAL_ENVIRONMENT}" --name="${US_DATA_MODAL_APP_NAME}" --tag="${US_DATA_RUN_ID}" modal_app/pipeline.py
python .github/scripts/spawn_modal_pipeline.py