|
810 | 810 | "docstring": "Replace clone-half person-level feature variables with donor matches.", |
811 | 811 | "id": "clone_features", |
812 | 812 | "kind": "function", |
813 | | - "line": 404, |
| 813 | + "line": 405, |
814 | 814 | "metadata": { |
815 | 815 | "api_refs": [ |
816 | 816 | "policyengine_us_data.datasets.cps.extended_cps._splice_clone_feature_predictions" |
|
873 | 873 | "docstring": "Assert that final exported variables are leaf inputs.", |
874 | 874 | "id": "computed_export_contract", |
875 | 875 | "kind": "function", |
876 | | - "line": 1267, |
| 876 | + "line": 1449, |
877 | 877 | "metadata": { |
878 | 878 | "api_refs": [ |
879 | 879 | "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._assert_no_computed_variables_exported" |
|
967 | 967 | "docstring": "Second-stage QRF: train on CPS, predict for PUF clones.\n\nFor the PUF clone half of the extended CPS we need plausible values\nof CPS-only variables (retirement distributions, transfers, hours,\nSPM components, etc.) that are consistent with the clone's\nPUF-imputed income -- not just naively copied from the CPS donor.\n\nWe train a QRF on CPS person-level data where:\n * predictors = demographics + key income variables\n * outputs = CPS-only variables listed in\n ``CPS_ONLY_IMPUTED_VARIABLES``\n\nFor PUF clone prediction we use the PUF-imputed income values\nfrom the second half of ``data`` (the clone half, which already\nhas PUF-imputed income from stage 1).\n\nUses ``fit_predict()`` with ``max_train_samples`` instead of\nmanual sampling + separate fit/predict.\n\nArgs:\n data: Extended dataset dict after ``puf_clone_dataset()`` --\n already doubled, with PUF-imputed income in the second half.\n time_period: Tax year.\n dataset_path: Path to the CPS h5 file for Microsimulation.\n\nReturns:\n DataFrame with one column per CPS-only variable, containing\n predicted values for the PUF clone half (person-level).", |
968 | 968 | "id": "cps_only", |
969 | 969 | "kind": "function", |
970 | | - "line": 443, |
| 970 | + "line": 444, |
971 | 971 | "metadata": { |
972 | 972 | "api_refs": [ |
973 | 973 | "policyengine_us_data.datasets.cps.extended_cps._impute_cps_only_variables" |
|
1175 | 1175 | "signature": "def derive_geography_from_blocks(block_geoids: np.ndarray) -> Dict[str, np.ndarray]", |
1176 | 1176 | "source_file": "policyengine_us_data/calibration/block_assignment.py" |
1177 | 1177 | }, |
| 1178 | + "housing_assistance_microsim_validation": { |
| 1179 | + "docstring": "Check formula-reconstructed housing assistance before export.\n\nThe final H5 must not export formula outputs such as ``housing_assistance``.\nThis guard verifies that the remaining leaf inputs still make those\nformulas produce nonzero values before the export contract strips or\nrejects computed variables.", |
| 1180 | + "id": "housing_assistance_microsim_validation", |
| 1181 | + "kind": "function", |
| 1182 | + "line": 1321, |
| 1183 | + "metadata": { |
| 1184 | + "api_refs": [ |
| 1185 | + "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._validate_housing_assistance_microsimulation" |
| 1186 | + ], |
| 1187 | + "artifacts_in": [ |
| 1188 | + "extended_cps_stage2" |
| 1189 | + ], |
| 1190 | + "artifacts_out": [ |
| 1191 | + "housing_validated_extended_cps" |
| 1192 | + ], |
| 1193 | + "description": "Runs a temporary microsimulation before final export to ensure housing-assistance leaf inputs reconstruct nonzero modeled housing assistance and capped SPM housing subsidy.", |
| 1194 | + "id": "housing_assistance_microsim_validation", |
| 1195 | + "label": "Validate Housing Assistance Microsimulation", |
| 1196 | + "node_type": "process", |
| 1197 | + "pathways": [ |
| 1198 | + "data_build" |
| 1199 | + ], |
| 1200 | + "pydoc": true, |
| 1201 | + "source_file": "policyengine_us_data/datasets/cps/extended_cps.py", |
| 1202 | + "stability": "moving", |
| 1203 | + "status": "transitional" |
| 1204 | + }, |
| 1205 | + "object_path": "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._validate_housing_assistance_microsimulation", |
| 1206 | + "signature": "def _validate_housing_assistance_microsimulation(cls, data, time_period, microsimulation_cls = None)", |
| 1207 | + "source_file": "policyengine_us_data/datasets/cps/extended_cps.py" |
| 1208 | + }, |
1178 | 1209 | "impute_puf_demographics": { |
1179 | 1210 | "docstring": "", |
1180 | 1211 | "id": "impute_puf_demographics", |
|
2619 | 2650 | "docstring": "Replace PUF clone half of CPS-only variables with QRF predictions.\n\nAfter ``puf_clone_dataset()`` the CPS-only variables in the second\nhalf are naive copies of the CPS donor values. This function\nreplaces them with the second-stage QRF predictions that are\nconsistent with the clone's PUF-imputed income.\n\nArgs:\n data: Extended dataset dict (already doubled).\n predictions: DataFrame from ``_impute_cps_only_variables()``.\n time_period: Tax year.\n dataset_path: Path to CPS h5 file for entity mapping.\n\nReturns:\n Modified data dict with CPS-only variables spliced in.", |
2620 | 2651 | "id": "qrf_pass2", |
2621 | 2652 | "kind": "function", |
2622 | | - "line": 749, |
| 2653 | + "line": 790, |
2623 | 2654 | "metadata": { |
2624 | 2655 | "api_refs": [ |
2625 | 2656 | "policyengine_us_data.datasets.cps.extended_cps._splice_cps_only_predictions" |
|
0 commit comments