Build Long-Run Projection Dataset #2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build Long-Run Projection Dataset | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| years: | |
| description: "Projection years/ranges to build" | |
| required: true | |
| default: "2026-2035,2040,2045,2050,2055,2060,2065,2070,2075,2080,2085,2090,2095,2100" | |
| type: string | |
| jobs: | |
| description: "Parallel year subprocesses" | |
| required: true | |
| default: "4" | |
| type: string | |
| profile: | |
| description: "Long-run calibration profile" | |
| required: true | |
| default: "ss-payroll-tob" | |
| type: string | |
| target_source: | |
| description: "Named long-term target source" | |
| required: true | |
| default: "trustees_2025_current_law" | |
| type: string | |
| tax_assumption: | |
| description: "Long-run federal tax assumption" | |
| required: true | |
| default: "trustees-2025-core-thresholds-v1" | |
| type: string | |
| base_dataset: | |
| description: "Optional base H5 path or hf:// URL; blank uses runner default" | |
| required: false | |
| default: "" | |
| type: string | |
| support_augmentation_profile: | |
| description: "Optional late-year support augmentation profile" | |
| required: false | |
| default: "" | |
| type: string | |
| support_augmentation_target_year: | |
| description: "Optional fixed support augmentation target year" | |
| required: false | |
| default: "" | |
| type: string | |
| support_augmentation_align_to_run_year: | |
| description: "Rebuild support augmentation separately for each run year" | |
| required: false | |
| default: false | |
| type: boolean | |
| support_augmentation_start_year: | |
| description: "Optional earliest support augmentation year" | |
| required: false | |
| default: "" | |
| type: string | |
| support_augmentation_top_n_targets: | |
| description: "Optional number of synthetic target types to map to donors" | |
| required: false | |
| default: "" | |
| type: string | |
| support_augmentation_donors_per_target: | |
| description: "Optional number of real donor tax units per target" | |
| required: false | |
| default: "" | |
| type: string | |
| support_augmentation_max_distance: | |
| description: "Optional maximum donor-match distance" | |
| required: false | |
| default: "" | |
| type: string | |
| support_augmentation_clone_weight_scale: | |
| description: "Optional baseline donor-clone weight multiplier" | |
| required: false | |
| default: "" | |
| type: string | |
| support_augmentation_blueprint_base_weight_scale: | |
| description: "Optional donor-composite blueprint base-weight scale" | |
| required: false | |
| default: "" | |
| type: string | |
| support_augmentation_sanitize_worker_non_target_income: | |
| description: "Zero worker-donor clone investment and retirement income" | |
| required: false | |
| default: false | |
| type: boolean | |
| support_augmentation_sanitize_clone_non_target_income: | |
| description: "Zero all donor-clone investment and retirement income" | |
| required: false | |
| default: false | |
| type: boolean | |
| allow_validation_failures: | |
| description: "Allow invalid artifacts to be written for diagnostics" | |
| required: false | |
| default: false | |
| type: boolean | |
| upload_to_hf_staging: | |
| description: "Upload generated H5s and metadata to run-scoped HF staging" | |
| required: false | |
| default: false | |
| type: boolean | |
| run_id: | |
| description: "Optional run ID; blank derives one from the GitHub run" | |
| required: false | |
| default: "" | |
| type: string | |
| source_sha: | |
| description: "Exact policyengine-us-data commit SHA or ref to checkout" | |
| required: false | |
| default: "" | |
| type: string | |
| allow_stale_policyengine_us: | |
| description: "Allow production build when policyengine-us lags the latest PyPI release" | |
| required: false | |
| default: false | |
| type: boolean | |
| concurrency: | |
| group: long-run-projection-${{ github.run_id }}-${{ github.run_attempt }} | |
| cancel-in-progress: false | |
| jobs: | |
| build-long-run: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 360 | |
| permissions: | |
| contents: read | |
| env: | |
| US_DATA_RUN_ID: ${{ inputs.run_id || '' }} | |
| steps: | |
| - uses: actions/checkout@v6 | |
| with: | |
| ref: ${{ inputs.source_sha || github.sha }} | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.14" | |
| - uses: astral-sh/setup-uv@v8.1.0 | |
| - name: Resolve run context | |
| id: run-context | |
| run: | | |
| checked_out_sha="$(git rev-parse HEAD)" | |
| echo "CHECKED_OUT_SHA=${checked_out_sha}" >> "$GITHUB_ENV" | |
| GITHUB_SHA="${checked_out_sha}" python .github/scripts/resolve_run_context.py | |
| - name: Require current PolicyEngine US dependency | |
| env: | |
| POLICYENGINE_US_ALLOW_STALE: ${{ inputs.allow_stale_policyengine_us }} | |
| run: python .github/scripts/check_policyengine_us_dependency.py --mode fail | |
| - name: Install dependencies | |
| run: uv sync --dev | |
| - name: Build long-run projection artifacts | |
| env: | |
| ALLOW_VALIDATION_FAILURES: ${{ inputs.allow_validation_failures }} | |
| BASE_DATASET: ${{ inputs.base_dataset }} | |
| HUGGING_FACE_TOKEN: ${{ inputs.upload_to_hf_staging && secrets.HUGGING_FACE_TOKEN || '' }} | |
| JOBS: ${{ inputs.jobs }} | |
| OUTPUT_DIR: projected_long_term/${{ steps.run-context.outputs.run_id }} | |
| PROFILE: ${{ inputs.profile }} | |
| SUPPORT_AUGMENTATION_ALIGN_TO_RUN_YEAR: ${{ inputs.support_augmentation_align_to_run_year }} | |
| SUPPORT_AUGMENTATION_BLUEPRINT_BASE_WEIGHT_SCALE: ${{ inputs.support_augmentation_blueprint_base_weight_scale }} | |
| SUPPORT_AUGMENTATION_CLONE_WEIGHT_SCALE: ${{ inputs.support_augmentation_clone_weight_scale }} | |
| SUPPORT_AUGMENTATION_DONORS_PER_TARGET: ${{ inputs.support_augmentation_donors_per_target }} | |
| SUPPORT_AUGMENTATION_MAX_DISTANCE: ${{ inputs.support_augmentation_max_distance }} | |
| SUPPORT_AUGMENTATION_PROFILE: ${{ inputs.support_augmentation_profile }} | |
| SUPPORT_AUGMENTATION_SANITIZE_CLONE_NON_TARGET_INCOME: ${{ inputs.support_augmentation_sanitize_clone_non_target_income }} | |
| SUPPORT_AUGMENTATION_SANITIZE_WORKER_NON_TARGET_INCOME: ${{ inputs.support_augmentation_sanitize_worker_non_target_income }} | |
| SUPPORT_AUGMENTATION_START_YEAR: ${{ inputs.support_augmentation_start_year }} | |
| SUPPORT_AUGMENTATION_TARGET_YEAR: ${{ inputs.support_augmentation_target_year }} | |
| SUPPORT_AUGMENTATION_TOP_N_TARGETS: ${{ inputs.support_augmentation_top_n_targets }} | |
| TARGET_SOURCE: ${{ inputs.target_source }} | |
| TAX_ASSUMPTION: ${{ inputs.tax_assumption }} | |
| UPLOAD_TO_HF_STAGING: ${{ inputs.upload_to_hf_staging }} | |
| YEARS: ${{ inputs.years }} | |
| run: | | |
| set -euo pipefail | |
| SOURCE_SHA="${CHECKED_OUT_SHA}" | |
| cmd=( | |
| uv run python policyengine_us_data/datasets/cps/long_term/run_long_term_production.py | |
| --years "${YEARS}" | |
| --jobs "${JOBS}" | |
| --output-dir "${OUTPUT_DIR}" | |
| --profile "${PROFILE}" | |
| --target-source "${TARGET_SOURCE}" | |
| --tax-assumption "${TAX_ASSUMPTION}" | |
| --run-id "${{ steps.run-context.outputs.run_id }}" | |
| --source-sha "${SOURCE_SHA}" | |
| ) | |
| if [ -n "${BASE_DATASET}" ]; then | |
| cmd+=(--base-dataset "${BASE_DATASET}") | |
| fi | |
| if [ -n "${SUPPORT_AUGMENTATION_PROFILE}" ]; then | |
| cmd+=(--support-augmentation-profile "${SUPPORT_AUGMENTATION_PROFILE}") | |
| fi | |
| if [ -n "${SUPPORT_AUGMENTATION_TARGET_YEAR}" ]; then | |
| cmd+=(--support-augmentation-target-year "${SUPPORT_AUGMENTATION_TARGET_YEAR}") | |
| fi | |
| if [ "${SUPPORT_AUGMENTATION_ALIGN_TO_RUN_YEAR}" = "true" ]; then | |
| cmd+=(--support-augmentation-align-to-run-year) | |
| fi | |
| if [ -n "${SUPPORT_AUGMENTATION_START_YEAR}" ]; then | |
| cmd+=(--support-augmentation-start-year "${SUPPORT_AUGMENTATION_START_YEAR}") | |
| fi | |
| if [ -n "${SUPPORT_AUGMENTATION_TOP_N_TARGETS}" ]; then | |
| cmd+=(--support-augmentation-top-n-targets "${SUPPORT_AUGMENTATION_TOP_N_TARGETS}") | |
| fi | |
| if [ -n "${SUPPORT_AUGMENTATION_DONORS_PER_TARGET}" ]; then | |
| cmd+=(--support-augmentation-donors-per-target "${SUPPORT_AUGMENTATION_DONORS_PER_TARGET}") | |
| fi | |
| if [ -n "${SUPPORT_AUGMENTATION_MAX_DISTANCE}" ]; then | |
| cmd+=(--support-augmentation-max-distance "${SUPPORT_AUGMENTATION_MAX_DISTANCE}") | |
| fi | |
| if [ -n "${SUPPORT_AUGMENTATION_CLONE_WEIGHT_SCALE}" ]; then | |
| cmd+=(--support-augmentation-clone-weight-scale "${SUPPORT_AUGMENTATION_CLONE_WEIGHT_SCALE}") | |
| fi | |
| if [ -n "${SUPPORT_AUGMENTATION_BLUEPRINT_BASE_WEIGHT_SCALE}" ]; then | |
| cmd+=(--support-augmentation-blueprint-base-weight-scale "${SUPPORT_AUGMENTATION_BLUEPRINT_BASE_WEIGHT_SCALE}") | |
| fi | |
| if [ "${SUPPORT_AUGMENTATION_SANITIZE_WORKER_NON_TARGET_INCOME}" = "true" ]; then | |
| cmd+=(--support-augmentation-sanitize-worker-non-target-income) | |
| fi | |
| if [ "${SUPPORT_AUGMENTATION_SANITIZE_CLONE_NON_TARGET_INCOME}" = "true" ]; then | |
| cmd+=(--support-augmentation-sanitize-clone-non-target-income) | |
| fi | |
| if [ "${ALLOW_VALIDATION_FAILURES}" = "true" ]; then | |
| cmd+=(--allow-validation-failures) | |
| fi | |
| if [ "${UPLOAD_TO_HF_STAGING}" = "true" ]; then | |
| cmd+=(--upload-to-hf-staging) | |
| fi | |
| "${cmd[@]}" | |
| - name: Upload manifests and logs | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: long-run-projection-manifests-${{ steps.run-context.outputs.run_id }} | |
| if-no-files-found: warn | |
| path: | | |
| projected_long_term/${{ steps.run-context.outputs.run_id }}/long_run_production_manifest.json | |
| projected_long_term/${{ steps.run-context.outputs.run_id }}/calibration_manifest.json | |
| projected_long_term/${{ steps.run-context.outputs.run_id }}/*.h5.metadata.json | |
| projected_long_term/${{ steps.run-context.outputs.run_id }}/support_augmentation_report*.json | |
| projected_long_term/${{ steps.run-context.outputs.run_id }}/.parallel_logs/*.log | |
| - name: Summarize run | |
| if: always() | |
| env: | |
| PROFILE: ${{ inputs.profile }} | |
| RUN_ID: ${{ steps.run-context.outputs.run_id }} | |
| TARGET_SOURCE: ${{ inputs.target_source }} | |
| TAX_ASSUMPTION: ${{ inputs.tax_assumption }} | |
| UPLOAD_TO_HF_STAGING: ${{ inputs.upload_to_hf_staging }} | |
| YEARS: ${{ inputs.years }} | |
| run: | | |
| { | |
| echo "## Long-run projection build" | |
| echo "" | |
| echo "- Run ID: \`${RUN_ID}\`" | |
| echo "- Years: \`${YEARS}\`" | |
| echo "- Profile: \`${PROFILE}\`" | |
| echo "- Target source: \`${TARGET_SOURCE}\`" | |
| echo "- Tax assumption: \`${TAX_ASSUMPTION}\`" | |
| echo "- HF staging upload: \`${UPLOAD_TO_HF_STAGING}\`" | |
| if [ "${UPLOAD_TO_HF_STAGING}" = "true" ]; then | |
| echo "- HF staging prefix: \`staging/${CHECKED_OUT_SHA}-${RUN_ID}/long_term/\`" | |
| fi | |
| } >> "$GITHUB_STEP_SUMMARY" |