|
3 | 3 | "docstring": "Set 2025 ACA take-up to match APTC enrollment targets.", |
4 | 4 | "id": "aca_2025_override", |
5 | 5 | "kind": "function", |
6 | | - "line": 404, |
| 6 | + "line": 420, |
7 | 7 | "metadata": { |
8 | 8 | "api_refs": [ |
9 | 9 | "policyengine_us_data.datasets.cps.enhanced_cps.create_aca_2025_takeup_override" |
|
34 | 34 | "docstring": "Impute rent and real_estate_taxes from ACS with state.\n\nArgs:\n data: CPS data dict.\n state_fips: State FIPS per household.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n Updated data dict.", |
35 | 35 | "id": "acs_qrf", |
36 | 36 | "kind": "function", |
37 | | - "line": 524, |
| 37 | + "line": 525, |
38 | 38 | "metadata": { |
39 | 39 | "api_refs": [ |
40 | 40 | "policyengine_us_data.calibration.source_impute._impute_acs" |
|
61 | 61 | "docstring": "\"Add auto loan balance, interest and net_worth variable.", |
62 | 62 | "id": "add_auto_loan", |
63 | 63 | "kind": "function", |
64 | | - "line": 3080, |
| 64 | + "line": 3063, |
65 | 65 | "metadata": { |
66 | 66 | "api_refs": [ |
67 | 67 | "policyengine_us_data.datasets.cps.cps.add_auto_loan_interest_and_net_worth" |
|
88 | 88 | "docstring": "Populate household-level geography variables used by PolicyEngine US.\n\nArgs:\n cps: Output CPS H5 group receiving derived household variables.\n household: Raw CPS household table.", |
89 | 89 | "id": "add_household_variables", |
90 | 90 | "kind": "function", |
91 | | - "line": 1673, |
| 91 | + "line": 1656, |
92 | 92 | "metadata": { |
93 | 93 | "api_refs": [ |
94 | 94 | "policyengine_us_data.datasets.cps.cps.add_household_variables" |
|
142 | 142 | "docstring": "Impute ORG-derived labor-market inputs and derive overtime premium.", |
143 | 143 | "id": "add_org_inputs", |
144 | 144 | "kind": "function", |
145 | | - "line": 2980, |
| 145 | + "line": 2963, |
146 | 146 | "metadata": { |
147 | 147 | "api_refs": [ |
148 | 148 | "policyengine_us_data.datasets.cps.cps.add_org_labor_market_inputs" |
|
223 | 223 | "docstring": "", |
224 | 224 | "id": "add_previous_year_income", |
225 | 225 | "kind": "function", |
226 | | - "line": 1715, |
| 226 | + "line": 1698, |
227 | 227 | "metadata": { |
228 | 228 | "api_refs": [ |
229 | 229 | "policyengine_us_data.datasets.cps.cps.add_previous_year_income" |
|
277 | 277 | "docstring": "", |
278 | 278 | "id": "add_spm_variables", |
279 | 279 | "kind": "function", |
280 | | - "line": 1634, |
| 280 | + "line": 1617, |
281 | 281 | "metadata": { |
282 | 282 | "api_refs": [ |
283 | 283 | "policyengine_us_data.datasets.cps.cps.add_spm_variables" |
|
304 | 304 | "docstring": "Assign SSN card type using PRCITSHP, employment status, and ASEC-UA conditions.\nCodes:\n- 0: \"NONE\" - Likely undocumented immigrants\n- 1: \"CITIZEN\" - US citizens (born or naturalized)\n- 2: \"NON_CITIZEN_VALID_EAD\" - Non-citizens with work/study authorization\n- 3: \"OTHER_NON_CITIZEN\" - Non-citizens with indicators of legal status", |
305 | 305 | "id": "add_ssn_card_type", |
306 | 306 | "kind": "function", |
307 | | - "line": 1821, |
| 307 | + "line": 1804, |
308 | 308 | "metadata": { |
309 | 309 | "api_refs": [ |
310 | 310 | "policyengine_us_data.datasets.cps.cps.add_ssn_card_type" |
|
358 | 358 | "docstring": "", |
359 | 359 | "id": "add_tips", |
360 | 360 | "kind": "function", |
361 | | - "line": 2720, |
| 361 | + "line": 2703, |
362 | 362 | "metadata": { |
363 | 363 | "api_refs": [ |
364 | 364 | "policyengine_us_data.datasets.cps.cps.add_tips" |
|
815 | 815 | "docstring": "Replace clone-half person-level feature variables with donor matches.", |
816 | 816 | "id": "clone_features", |
817 | 817 | "kind": "function", |
818 | | - "line": 607, |
| 818 | + "line": 603, |
819 | 819 | "metadata": { |
820 | 820 | "api_refs": [ |
821 | 821 | "policyengine_us_data.datasets.cps.extended_cps._splice_clone_feature_predictions" |
|
878 | 878 | "docstring": "Assert that final exported variables are leaf inputs.", |
879 | 879 | "id": "computed_export_contract", |
880 | 880 | "kind": "function", |
881 | | - "line": 1802, |
| 881 | + "line": 1782, |
882 | 882 | "metadata": { |
883 | 883 | "api_refs": [ |
884 | 884 | "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._assert_no_computed_variables_exported" |
|
972 | 972 | "docstring": "Second-stage QRF: train on CPS, predict for PUF clones.\n\nFor the PUF clone half of the extended CPS we need plausible values\nof CPS-only variables (retirement distributions, transfers, hours,\nSPM components, etc.) that are consistent with the clone's\nPUF-imputed income -- not just naively copied from the CPS donor.\n\nWe train a QRF on CPS person-level data where:\n * predictors = demographics + key income variables\n * outputs = CPS-only variables listed in\n ``CPS_ONLY_IMPUTED_VARIABLES``\n\nFor PUF clone prediction we use the PUF-imputed income values\nfrom the second half of ``data`` (the clone half, which already\nhas PUF-imputed income from stage 1).\n\nUses ``fit_predict()`` with ``max_train_samples`` instead of\nmanual sampling + separate fit/predict.\n\nArgs:\n data: Extended dataset dict after ``puf_clone_dataset()`` --\n already doubled, with PUF-imputed income in the second half.\n time_period: Tax year.\n dataset_path: Path to the CPS h5 file for Microsimulation.\n\nReturns:\n DataFrame with one column per CPS-only variable, containing\n predicted values for the PUF clone half (person-level).", |
973 | 973 | "id": "cps_only", |
974 | 974 | "kind": "function", |
975 | | - "line": 646, |
| 975 | + "line": 642, |
976 | 976 | "metadata": { |
977 | 977 | "api_refs": [ |
978 | 978 | "policyengine_us_data.datasets.cps.extended_cps._impute_cps_only_variables" |
|
1325 | 1325 | "docstring": "Check formula-reconstructed housing assistance before export.\n\nThe final H5 must not export formula outputs such as ``housing_assistance``.\nThis guard verifies that the remaining leaf inputs still make those\nformulas produce nonzero values before the export contract strips or\nrejects computed variables.", |
1326 | 1326 | "id": "housing_assistance_microsim_validation", |
1327 | 1327 | "kind": "function", |
1328 | | - "line": 1572, |
| 1328 | + "line": 1552, |
1329 | 1329 | "metadata": { |
1330 | 1330 | "api_refs": [ |
1331 | 1331 | "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._validate_housing_assistance_microsimulation" |
|
3243 | 3243 | "docstring": "Run QRF imputation for PUF variables.\n\nStratified-subsamples PUF records (top 0.5% by AGI kept,\nrest randomly sampled to ~20K total), trains QRF, and\npredicts on CPS data.\n\nArgs:\n data: CPS data dict.\n time_period: Tax year.\n puf_dataset: PUF dataset class or path.\n dataset_path: Path to CPS h5 for computing\n demographic predictors via Microsimulation.\n\nReturns:\n Tuple of (y_full_imputations, y_override_imputations)\n as dicts of {variable: np.ndarray}.", |
3244 | 3244 | "id": "puf_qrf_pass", |
3245 | 3245 | "kind": "function", |
3246 | | - "line": 914, |
| 3246 | + "line": 898, |
3247 | 3247 | "metadata": { |
3248 | 3248 | "api_refs": [ |
3249 | 3249 | "policyengine_us_data.calibration.puf_impute._run_qrf_imputation" |
|
3270 | 3270 | "docstring": "Replace PUF clone half of CPS-only variables with QRF predictions.\n\nAfter ``puf_clone_dataset()`` the CPS-only variables in the second\nhalf are naive copies of the CPS donor values. This function\nreplaces them with the second-stage QRF predictions that are\nconsistent with the clone's PUF-imputed income.\n\nArgs:\n data: Extended dataset dict (already doubled).\n predictions: DataFrame from ``_impute_cps_only_variables()``.\n time_period: Tax year.\n dataset_path: Path to CPS h5 file for entity mapping.\n\nReturns:\n Modified data dict with CPS-only variables spliced in.", |
3271 | 3271 | "id": "qrf_pass2", |
3272 | 3272 | "kind": "function", |
3273 | | - "line": 1037, |
| 3273 | + "line": 1017, |
3274 | 3274 | "metadata": { |
3275 | 3275 | "api_refs": [ |
3276 | 3276 | "policyengine_us_data.datasets.cps.extended_cps._splice_cps_only_predictions" |
|
3562 | 3562 | "docstring": "", |
3563 | 3563 | "id": "reweight", |
3564 | 3564 | "kind": "function", |
3565 | | - "line": 487, |
| 3565 | + "line": 503, |
3566 | 3566 | "metadata": { |
3567 | 3567 | "api_refs": [ |
3568 | 3568 | "policyengine_us_data.datasets.cps.enhanced_cps.reweight" |
|
3697 | 3697 | "docstring": "Run structural integrity checks on an H5 file.\n\nArgs:\n h5_path: Path to the H5 dataset file.\n period: Tax year (used for variable keys).\n\nReturns:\n List of {check, status, detail} dicts.", |
3698 | 3698 | "id": "sanity_checks", |
3699 | 3699 | "kind": "function", |
3700 | | - "line": 331, |
| 3700 | + "line": 329, |
3701 | 3701 | "metadata": { |
3702 | 3702 | "api_refs": [ |
3703 | 3703 | "policyengine_us_data.calibration.sanity_checks.run_sanity_checks" |
|
3724 | 3724 | "docstring": "Impute net_worth and auto_loan from SCF.\n\nArgs:\n data: CPS data dict.\n state_fips: State FIPS per household.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n Updated data dict.", |
3725 | 3725 | "id": "scf_qrf", |
3726 | 3726 | "kind": "function", |
3727 | | - "line": 1108, |
| 3727 | + "line": 1113, |
3728 | 3728 | "metadata": { |
3729 | 3729 | "api_refs": [ |
3730 | 3730 | "policyengine_us_data.calibration.source_impute._impute_scf" |
|
3778 | 3778 | "docstring": "Impute tip_income, liquid assets, and vehicle signals from SIPP.\n\nArgs:\n data: CPS data dict.\n state_fips: State FIPS per household.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n Updated data dict.", |
3779 | 3779 | "id": "sipp_qrf", |
3780 | 3780 | "kind": "function", |
3781 | | - "line": 649, |
| 3781 | + "line": 650, |
3782 | 3782 | "metadata": { |
3783 | 3783 | "api_refs": [ |
3784 | 3784 | "policyengine_us_data.calibration.source_impute._impute_sipp" |
|
3805 | 3805 | "docstring": "Re-impute ACS/SIPP/ORG/SCF variables from donor surveys.\n\nOverwrites existing imputed values in data. ACS uses\nstate_fips as a QRF predictor; ORG uses state plus labor-market\npredictors; SIPP and SCF use only demographic and financial\npredictors (no state data).\n\nArgs:\n data: CPS dataset dict {variable: {time_period: array}}.\n state_fips: State FIPS per household.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n skip_acs: Skip ACS imputation.\n skip_sipp: Skip SIPP imputation.\n skip_org: Skip ORG imputation.\n skip_scf: Skip SCF imputation.\n\nReturns:\n Updated data dict with re-imputed variables.", |
3806 | 3806 | "id": "source_impute", |
3807 | 3807 | "kind": "function", |
3808 | | - "line": 219, |
| 3808 | + "line": 220, |
3809 | 3809 | "metadata": { |
3810 | 3810 | "api_refs": [ |
3811 | 3811 | "policyengine_us_data.calibration.source_impute.impute_source_variables" |
|
0 commit comments