|
474 | 474 | "docstring": "Build all datasets with preemption-resilient checkpointing.\n\nArgs:\n upload: Whether to upload completed datasets.\n branch: Git branch to build from.\n sequential: Use sequential (non-parallel) execution.\n clear_checkpoints: Clear existing checkpoints before starting.\n skip_tests: Skip running the test suite (useful for calibration runs).\n skip_enhanced_cps: Skip enhanced_cps.py and small_enhanced_cps.py\n (useful for calibration runs that only need source_imputed H5).\n skip_stage_5: Skip source-imputed CPS and small enhanced CPS after\n enhanced_cps_2024.h5 is built.\n stage_only: Upload to HF staging only, without promoting a release.\n version: policyengine-us-data package version used for staging and\n dataset-build contracts.", |
475 | 475 | "id": "build_datasets", |
476 | 476 | "kind": "function", |
477 | | - "line": 536, |
| 477 | + "line": 547, |
478 | 478 | "metadata": { |
479 | 479 | "api_refs": [ |
480 | 480 | "modal_app.data_build.build_datasets" |
481 | 481 | ], |
482 | 482 | "artifacts_out": [ |
483 | | - "source_imputed_*.h5", |
| 483 | + "dataset_build_output.json", |
| 484 | + "dataset_inventory.json", |
| 485 | + "source_dataset_schema_summary.json", |
| 486 | + "target_database_schema_summary.json", |
| 487 | + "source_imputed_stratified_extended_cps_2024.h5", |
| 488 | + "source_imputed_stratified_extended_cps.h5", |
484 | 489 | "policy_data.db" |
485 | 490 | ], |
486 | 491 | "description": "Build base datasets, source-imputed artifacts, and optional uploads inside the Modal runtime.", |
|
999 | 1004 | "docstring": "Build CPS before PUF because PUF pension imputation loads CPS_2024.", |
1000 | 1005 | "id": "cps_puf_build_phase", |
1001 | 1006 | "kind": "function", |
1002 | | - "line": 404, |
| 1007 | + "line": 406, |
1003 | 1008 | "metadata": { |
1004 | 1009 | "api_refs": [ |
1005 | 1010 | "modal_app.data_build.run_cps_then_puf_phase" |
|
1155 | 1160 | "signature": "def fit_artifacts_for_scope(scope: FitScope | str) -> ScopedFitArtifacts", |
1156 | 1161 | "source_file": "policyengine_us_data/fit_weights/artifacts.py" |
1157 | 1162 | }, |
| 1163 | + "fitted_weights_output_bundle": { |
| 1164 | + "docstring": "Scoped output bundle created before Stage 3 bytes become files.", |
| 1165 | + "id": "fitted_weights_output_bundle", |
| 1166 | + "kind": "class", |
| 1167 | + "line": 113, |
| 1168 | + "metadata": { |
| 1169 | + "api_refs": [ |
| 1170 | + "policyengine_us_data.fit_weights.bundles.FittedWeightsOutputBundle" |
| 1171 | + ], |
| 1172 | + "artifacts_in": [ |
| 1173 | + "remote fit result bytes" |
| 1174 | + ], |
| 1175 | + "artifacts_out": [ |
| 1176 | + "scoped fitted-weight artifact writes" |
| 1177 | + ], |
| 1178 | + "description": "Scoped Stage 3 result bytes before artifact file writes.", |
| 1179 | + "id": "fitted_weights_output_bundle", |
| 1180 | + "label": "Fitted Weights Output Bundle", |
| 1181 | + "node_type": "library", |
| 1182 | + "pathways": [ |
| 1183 | + "fit_weights", |
| 1184 | + "artifact_identity" |
| 1185 | + ], |
| 1186 | + "source_file": "policyengine_us_data/fit_weights/bundles.py", |
| 1187 | + "stability": "moving", |
| 1188 | + "status": "current", |
| 1189 | + "validation_commands": [ |
| 1190 | + "uv run pytest tests/unit/fit_weights/test_bundles.py" |
| 1191 | + ] |
| 1192 | + }, |
| 1193 | + "object_path": "policyengine_us_data.fit_weights.bundles.FittedWeightsOutputBundle", |
| 1194 | + "signature": "class FittedWeightsOutputBundle", |
| 1195 | + "source_file": "policyengine_us_data/fit_weights/bundles.py" |
| 1196 | + }, |
1158 | 1197 | "fitted_weights_spec": { |
1159 | 1198 | "docstring": "Return the current fitted-weight spec for a regional or national scope.", |
1160 | 1199 | "id": "fitted_weights_spec", |
|
3013 | 3052 | "docstring": "Promote a completed pipeline run to production.\n\n1. Verify run status is \"completed\"\n2. Promote every staged artifact in one Hugging Face commit\n3. Upload/copy every artifact to GCS\n4. Finalize release_manifest.json, tag the release, and update\n version_manifest.json\n5. Update run status to \"promoted\"\n\nArgs:\n run_id: The run ID to promote.\n candidate_version: Candidate staging scope used for staged source files.\n release_version: Stable version used for final release metadata.\n\nReturns:\n Summary message.", |
3014 | 3053 | "id": "promote_pipeline_run", |
3015 | 3054 | "kind": "function", |
3016 | | - "line": 1893, |
| 3055 | + "line": 1884, |
3017 | 3056 | "metadata": { |
3018 | 3057 | "api_refs": [ |
3019 | 3058 | "modal_app.pipeline.promote_run" |
|
3400 | 3439 | "docstring": "Run the full pipeline end-to-end.\n\nArgs:\n branch: Git branch to build from.\n gpu: GPU type for regional calibration.\n epochs: Training epochs for regional calibration.\n national_gpu: GPU type for national calibration.\n national_epochs: Training epochs for national.\n num_workers: Number of parallel H5 workers.\n n_clones: Number of clones for H5 building.\n skip_national: Skip national calibration/H5.\n resume_run_id: Resume a previously failed run.\n clear_checkpoints: Wipe ALL checkpoints before building\n (default False). Normally not needed \u2014 checkpoints are\n scoped by commit SHA, so stale ones from other commits\n are cleaned automatically. Use True only to force a\n full rebuild of the current commit.\n candidate_version: Candidate staging scope used for HF staging.\n release_version: Final stable release version. Usually empty until\n promotion.\n base_release_version: Stable release current when this candidate was\n built.\n release_bump: Intended SemVer bump for this candidate.\n sha_override: Exact source SHA deployed by GitHub Actions. When\n provided, this is recorded instead of reading the current\n branch tip.\n run_id: Cross-system run ID created by GitHub.\n run_context: Serialized run context from the launcher workflow.\n modal_app_name: Deployed Modal app name for this run.\n modal_environment: Modal environment used for this run.\n chunked_matrix: Build the calibration matrix in clone-household\n chunks instead of the non-chunked path. Opt-in; default off.\n chunk_size: Clone-household columns per chunk when\n ``chunked_matrix`` is True.\n parallel_matrix: Fan chunked matrix building across Modal\n workers via ``build_matrix_chunk_worker``. Only meaningful\n when ``chunked_matrix`` is True; ignored otherwise.\n num_matrix_workers: Number of Modal workers when\n ``parallel_matrix`` is True.\n\nReturns:\n The run ID for use with promote.", |
3401 | 3440 | "id": "run_modal_pipeline", |
3402 | 3441 | "kind": "function", |
3403 | | - "line": 909, |
| 3442 | + "line": 917, |
3404 | 3443 | "metadata": { |
3405 | 3444 | "api_refs": [ |
3406 | 3445 | "modal_app.pipeline.run_pipeline" |
|
3656 | 3695 | "docstring": "Return all artifact specs known to the Stage 1 dataset build.", |
3657 | 3696 | "id": "stage_1_dataset_artifact_specs", |
3658 | 3697 | "kind": "function", |
3659 | | - "line": 231, |
| 3698 | + "line": 285, |
3660 | 3699 | "metadata": { |
3661 | 3700 | "api_refs": [ |
3662 | 3701 | "policyengine_us_data.build_datasets.artifacts.stage_1_artifact_specs" |
|
3676 | 3715 | "small_enhanced_cps_2024.h5", |
3677 | 3716 | "source_imputed_stratified_extended_cps.h5", |
3678 | 3717 | "policy_data.db", |
| 3718 | + "calibration_weights.npy", |
3679 | 3719 | "build_log.txt", |
3680 | | - "data_build_checkpoint_stats.json" |
| 3720 | + "data_build_checkpoint_stats.json", |
| 3721 | + "dataset_inventory.json", |
| 3722 | + "source_dataset_schema_summary.json", |
| 3723 | + "target_database_schema_summary.json" |
3681 | 3724 | ], |
3682 | 3725 | "description": "Canonical artifact inventory for Stage 1 dataset-build outputs.", |
3683 | 3726 | "id": "stage_1_dataset_artifact_specs", |
|
4042 | 4085 | "docstring": "Verify deployed-image imports and subprocess seams.", |
4043 | 4086 | "id": "verify_runtime_seams", |
4044 | 4087 | "kind": "function", |
4045 | | - "line": 536, |
| 4088 | + "line": 544, |
4046 | 4089 | "metadata": { |
4047 | 4090 | "api_refs": [ |
4048 | 4091 | "modal_app.pipeline.verify_runtime_seams" |
|
0 commit comments