Skip to content

Commit 5a0bc0c

Browse files
authored
Merge pull request #855 from PolicyEngine/feat/phase-3c-step-manifests
Add Phase 3c run manifests and publication identity
2 parents 2d85ccd + 060402e commit 5a0bc0c

43 files changed

Lines changed: 4432 additions & 723 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,4 @@ Five workflow files in `.github/workflows/`:
6060
- `pr.yaml` — fork check, lint, uv.lock freshness, towncrier fragment check, unit tests, smoke test, docs build. Integration tests trigger when files in `policyengine_us_data/`, `modal_app/`, or `tests/integration/` change. ~2–3 min for the unit path.
6161
- `push.yaml` — on push to main: either version-bump + PyPI publish (on `Update package version` commits), or a full Modal data build with integration tests (on everything else).
6262
- `pipeline.yaml` — dispatch only, spawns the H5 generation pipeline on Modal with configurable GPU/epochs/workers.
63-
- `local_area_publish.yaml` / `local_area_promote.yaml` — manual dispatch to build/stage and then promote local-area H5 files.
63+
- `local_area_publish.yaml` / `local_area_promote.yaml` — manual dispatch to build/stage local-area H5 files and promote a run-scoped US data release.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
workflow_file="${PIPELINE_WORKFLOW_FILE:-pipeline.yaml}"
5+
workflow_ref="${PIPELINE_WORKFLOW_REF:-main}"
6+
7+
if [[ -z "${US_DATA_RUN_ID:-}" ]]; then
8+
echo "US_DATA_RUN_ID is required" >&2
9+
exit 1
10+
fi
11+
12+
if [[ -z "${SOURCE_SHA:-}" ]]; then
13+
echo "SOURCE_SHA is required" >&2
14+
exit 1
15+
fi
16+
17+
gh workflow run "${workflow_file}" \
18+
--ref "${workflow_ref}" \
19+
-f run_id="${US_DATA_RUN_ID}" \
20+
-f source_sha="${SOURCE_SHA}"
21+
22+
if [[ -n "${GITHUB_STEP_SUMMARY:-}" ]]; then
23+
{
24+
echo "## Pipeline Dispatched"
25+
echo
26+
echo "| Field | Value |"
27+
echo "|-------|-------|"
28+
echo "| Run ID | \`${US_DATA_RUN_ID}\` |"
29+
echo "| Source SHA | \`${SOURCE_SHA}\` |"
30+
echo "| Workflow | \`${workflow_file}\` |"
31+
echo "| Workflow ref | \`${workflow_ref}\` |"
32+
} >> "${GITHUB_STEP_SUMMARY}"
33+
fi
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""Promote a completed run-scoped US data pipeline from GitHub Actions."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
import os
7+
import sys
8+
from pathlib import Path
9+
10+
import modal
11+
12+
_REPO_ROOT = Path(__file__).resolve().parents[2]
13+
if str(_REPO_ROOT) not in sys.path:
14+
sys.path.insert(0, str(_REPO_ROOT))
15+
16+
from policyengine_us_data.utils.run_context import RunContext # noqa: E402
17+
18+
19+
def _append_summary(result: str, context: RunContext) -> None:
20+
summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
21+
if not summary_path:
22+
return
23+
24+
with Path(summary_path).open("a") as handle:
25+
handle.write("## Publication Promoted\n\n")
26+
handle.write("| Field | Value |\n")
27+
handle.write("|-------|-------|\n")
28+
handle.write(f"| Run ID | `{context.run_id}` |\n")
29+
handle.write(f"| Modal app | `{context.modal_app_name}` |\n")
30+
handle.write(f"| Modal environment | `{context.modal_environment}` |\n")
31+
handle.write(f"| HF staging | `{context.hf_staging_prefix}` |\n")
32+
if os.environ.get("VERSION_OVERRIDE"):
33+
handle.write(f"| Version override | `{os.environ['VERSION_OVERRIDE']}` |\n")
34+
handle.write("\n")
35+
handle.write("```text\n")
36+
handle.write(result)
37+
handle.write("\n```\n")
38+
39+
40+
def main() -> None:
41+
context = RunContext.from_env()
42+
if not context.run_id:
43+
raise RuntimeError("US_DATA_RUN_ID is required to promote a publication run.")
44+
45+
app_name = context.modal_app_name or "policyengine-us-data-pipeline"
46+
environment_name = context.modal_environment or os.environ.get("MODAL_ENVIRONMENT")
47+
if environment_name:
48+
promote_run = modal.Function.from_name(
49+
app_name,
50+
"promote_run",
51+
environment_name=environment_name,
52+
)
53+
else:
54+
promote_run = modal.Function.from_name(app_name, "promote_run")
55+
56+
kwargs = {"run_id": context.run_id}
57+
if os.environ.get("VERSION_OVERRIDE"):
58+
kwargs["version"] = os.environ["VERSION_OVERRIDE"]
59+
60+
print("Promoting publication run.")
61+
print(f"Run ID: {context.run_id}")
62+
print(f"Modal app: {app_name}")
63+
print(f"Modal environment: {environment_name}")
64+
print(f"HF staging prefix: {context.hf_staging_prefix}")
65+
print(f"Request: {json.dumps(kwargs, sort_keys=True)}")
66+
result = promote_run.remote(**kwargs)
67+
print(result)
68+
_append_summary(result, context)
69+
70+
71+
if __name__ == "__main__":
72+
main()
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
"""Resolve run context for GitHub Actions workflows."""
2+
3+
from __future__ import annotations
4+
5+
import os
6+
import sys
7+
from pathlib import Path
8+
from typing import Mapping
9+
10+
_REPO_ROOT = Path(__file__).resolve().parents[2]
11+
if str(_REPO_ROOT) not in sys.path:
12+
sys.path.insert(0, str(_REPO_ROOT))
13+
14+
from policyengine_us_data.utils.run_context import ( # noqa: E402
15+
DEFAULT_MODAL_APP_PREFIX,
16+
RUN_ID_ENV,
17+
RunContext,
18+
build_modal_resource_name,
19+
build_run_id,
20+
)
21+
22+
23+
def _append_key_values(path_env: str, values: dict[str, str]) -> None:
24+
output_path = os.environ.get(path_env)
25+
if not output_path:
26+
return
27+
with Path(output_path).open("a") as handle:
28+
for key, value in values.items():
29+
handle.write(f"{key}={value}\n")
30+
31+
32+
def _github_actions_run_id(env: Mapping[str, str]) -> str:
33+
if not env.get("GITHUB_RUN_ID"):
34+
return ""
35+
return build_run_id(
36+
github_run_id=env.get("GITHUB_RUN_ID", ""),
37+
github_run_attempt=env.get("GITHUB_RUN_ATTEMPT", "1"),
38+
github_sha=env.get("GITHUB_SHA", ""),
39+
)
40+
41+
42+
def main() -> None:
43+
env = os.environ
44+
app_prefix = env.get("US_DATA_MODAL_APP_PREFIX", DEFAULT_MODAL_APP_PREFIX)
45+
run_id = env.get(RUN_ID_ENV, "")
46+
context = RunContext.from_env(
47+
run_id=run_id or _github_actions_run_id(env),
48+
modal_app_prefix=app_prefix,
49+
)
50+
if not context.run_id:
51+
raise RuntimeError(
52+
"Could not resolve run ID. Set US_DATA_RUN_ID or run "
53+
"inside GitHub Actions with GITHUB_RUN_ID."
54+
)
55+
56+
pipeline_volume_name = os.environ.get(
57+
"US_DATA_PIPELINE_VOLUME_NAME",
58+
build_modal_resource_name(
59+
context.run_id,
60+
prefix="pipeline-artifacts",
61+
),
62+
)
63+
staging_volume_name = os.environ.get(
64+
"US_DATA_STAGING_VOLUME_NAME",
65+
build_modal_resource_name(
66+
context.run_id,
67+
prefix="local-area-staging",
68+
),
69+
)
70+
checkpoint_volume_name = os.environ.get(
71+
"US_DATA_CHECKPOINT_VOLUME_NAME",
72+
build_modal_resource_name(
73+
context.run_id,
74+
prefix="data-build-checkpoints",
75+
),
76+
)
77+
context = RunContext.from_mapping(
78+
{
79+
**context.to_dict(),
80+
"pipeline_volume_name": pipeline_volume_name,
81+
"staging_volume_name": staging_volume_name,
82+
"checkpoint_volume_name": checkpoint_volume_name,
83+
},
84+
modal_app_name=context.modal_app_name,
85+
modal_environment=context.modal_environment,
86+
)
87+
88+
outputs = {
89+
"run_id": context.run_id,
90+
"modal_app_name": context.modal_app_name,
91+
"modal_environment": context.modal_environment,
92+
"hf_staging_prefix": context.hf_staging_prefix,
93+
"github_run_url": context.github_run_url,
94+
"pipeline_volume_name": context.pipeline_volume_name,
95+
"staging_volume_name": context.staging_volume_name,
96+
"checkpoint_volume_name": context.checkpoint_volume_name,
97+
}
98+
_append_key_values("GITHUB_OUTPUT", outputs)
99+
_append_key_values("GITHUB_ENV", context.export_env())
100+
print(context.to_json())
101+
102+
103+
if __name__ == "__main__":
104+
main()

.github/scripts/spawn_modal_pipeline.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,21 @@
11
import os
2+
import sys
23
from pathlib import Path
34

45
import modal
56

7+
_REPO_ROOT = Path(__file__).resolve().parents[2]
8+
if str(_REPO_ROOT) not in sys.path:
9+
sys.path.insert(0, str(_REPO_ROOT))
10+
11+
from policyengine_us_data.utils.run_context import RunContext # noqa: E402
12+
613

714
def _as_bool(value: str) -> bool:
815
return value.lower() == "true"
916

1017

11-
def _append_summary(function_call_id: str) -> None:
18+
def _append_summary(function_call_id: str, context: RunContext) -> None:
1219
summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
1320
if not summary_path:
1421
return
@@ -23,13 +30,20 @@ def _append_summary(function_call_id: str) -> None:
2330
f"`{os.environ['EPOCHS']}` / "
2431
f"`{os.environ['NATIONAL_EPOCHS']}` |\n"
2532
)
33+
handle.write(f"| Run ID | `{context.run_id}` |\n")
34+
handle.write(f"| Modal app | `{context.modal_app_name}` |\n")
35+
handle.write(f"| Modal environment | `{context.modal_environment}` |\n")
36+
handle.write(f"| HF staging | `{context.hf_staging_prefix}` |\n")
37+
if os.environ.get("SOURCE_SHA"):
38+
handle.write(f"| Source SHA | `{os.environ['SOURCE_SHA']}` |\n")
2639
handle.write(f"| Function call ID | `{function_call_id}` |\n\n")
2740
handle.write("**[Monitor on Modal Dashboard](https://modal.com/apps)**\n")
2841

2942

3043
def main() -> None:
31-
app_name = os.environ.get("MODAL_APP_NAME", "policyengine-us-data-pipeline")
32-
environment_name = os.environ.get("MODAL_ENVIRONMENT")
44+
context = RunContext.from_env()
45+
app_name = context.modal_app_name or "policyengine-us-data-pipeline"
46+
environment_name = context.modal_environment or os.environ.get("MODAL_ENVIRONMENT")
3347
kwargs = {
3448
"branch": os.environ.get("PIPELINE_BRANCH", "main"),
3549
"gpu": os.environ["GPU"],
@@ -39,6 +53,11 @@ def main() -> None:
3953
"skip_national": _as_bool(os.environ["SKIP_NATIONAL"]),
4054
"resume_run_id": os.environ.get("RESUME_RUN_ID") or None,
4155
"version_override": os.environ.get("VERSION_OVERRIDE", ""),
56+
"sha_override": os.environ.get("SOURCE_SHA", ""),
57+
"run_id": context.run_id,
58+
"run_context": context.to_dict(),
59+
"modal_app_name": context.modal_app_name,
60+
"modal_environment": context.modal_environment,
4261
}
4362
if environment_name:
4463
run_pipeline = modal.Function.from_name(
@@ -50,8 +69,14 @@ def main() -> None:
5069
run_pipeline = modal.Function.from_name(app_name, "run_pipeline")
5170
function_call = run_pipeline.spawn(**kwargs)
5271
print("Pipeline spawned.")
72+
print(f"Run ID: {context.run_id}")
73+
print(f"Modal app: {app_name}")
74+
print(f"Modal environment: {environment_name}")
75+
print(f"HF staging prefix: {context.hf_staging_prefix}")
76+
if os.environ.get("SOURCE_SHA"):
77+
print(f"Source SHA: {os.environ['SOURCE_SHA']}")
5378
print(f"Function call ID: {function_call.object_id}")
54-
_append_summary(function_call.object_id)
79+
_append_summary(function_call.object_id, context)
5580

5681

5782
if __name__ == "__main__":
Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,31 @@
1-
name: Promote Local Area H5 Files
1+
name: Promote US Data Release
22

33
on:
44
workflow_dispatch:
55
inputs:
66
run_id:
7-
description: 'Run ID to promote (e.g. 1.23.0_a1b2c3d4_20260407_120000)'
7+
description: 'Run ID to promote (e.g. usdata-gha123456-a1-abcdef12)'
88
required: true
99
type: string
10-
branch:
11-
description: 'Branch to use for repo setup'
10+
version:
11+
description: 'Optional version override; defaults to run metadata'
1212
required: false
13-
default: 'main'
13+
default: ''
1414
type: string
1515

1616
jobs:
17-
promote-local-area:
17+
promote-release:
1818
runs-on: ubuntu-latest
1919
permissions:
2020
contents: read
2121
env:
2222
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
2323
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
2424
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
25+
MODAL_ENVIRONMENT: main
26+
US_DATA_MODAL_APP_PREFIX: policyengine-us-data-pub
27+
US_DATA_RUN_ID: ${{ github.event.inputs.run_id }}
28+
VERSION_OVERRIDE: ${{ github.event.inputs.version }}
2529

2630
steps:
2731
- name: Checkout repo
@@ -35,9 +39,9 @@ jobs:
3539
- name: Install Modal CLI
3640
run: pip install modal
3741

38-
- name: Promote staged files to production
39-
run: |
40-
RUN_ID="${{ github.event.inputs.run_id }}"
41-
BRANCH="${{ github.event.inputs.branch }}"
42-
echo "Promoting run ${RUN_ID} from branch ${BRANCH}"
43-
modal run modal_app/local_area.py::main_promote --run-id="${RUN_ID}" --branch="${BRANCH}"
42+
- name: Resolve run context
43+
id: run-context
44+
run: python .github/scripts/resolve_run_context.py
45+
46+
- name: Promote staged release to production
47+
run: python .github/scripts/promote_publication_pipeline.py

0 commit comments

Comments
 (0)