|
34 | 34 | "docstring": "Impute rent and real_estate_taxes from ACS with state.\n\nArgs:\n data: CPS data dict.\n state_fips: State FIPS per household.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n Updated data dict.", |
35 | 35 | "id": "acs_qrf", |
36 | 36 | "kind": "function", |
37 | | - "line": 490, |
| 37 | + "line": 507, |
38 | 38 | "metadata": { |
39 | 39 | "api_refs": [ |
40 | 40 | "policyengine_us_data.calibration.source_impute._impute_acs" |
|
61 | 61 | "docstring": "\"Add auto loan balance, interest and net_worth variable.", |
62 | 62 | "id": "add_auto_loan", |
63 | 63 | "kind": "function", |
64 | | - "line": 2904, |
| 64 | + "line": 2938, |
65 | 65 | "metadata": { |
66 | 66 | "api_refs": [ |
67 | 67 | "policyengine_us_data.datasets.cps.cps.add_auto_loan_interest_and_net_worth" |
|
88 | 88 | "docstring": "Populate household-level geography variables used by PolicyEngine US.\n\nArgs:\n cps: Output CPS H5 group receiving derived household variables.\n household: Raw CPS household table.", |
89 | 89 | "id": "add_household_variables", |
90 | 90 | "kind": "function", |
91 | | - "line": 1485, |
| 91 | + "line": 1519, |
92 | 92 | "metadata": { |
93 | 93 | "api_refs": [ |
94 | 94 | "policyengine_us_data.datasets.cps.cps.add_household_variables" |
|
115 | 115 | "docstring": "Add basic ID and weight variables.\n\nArgs:\n cps (h5py.File): The CPS dataset file.\n person (DataFrame): The person table of the ASEC.\n tax_unit (DataFrame): The tax unit table created from the person table\n of the ASEC.\n family (DataFrame): The family table of the ASEC.\n spm_unit (DataFrame): The SPM unit table created from the person table\n of the ASEC.\n household (DataFrame): The household table of the ASEC.", |
116 | 116 | "id": "add_id_variables", |
117 | 117 | "kind": "function", |
118 | | - "line": 954, |
| 118 | + "line": 988, |
119 | 119 | "metadata": { |
120 | 120 | "api_refs": [ |
121 | 121 | "policyengine_us_data.datasets.cps.cps.add_id_variables" |
|
142 | 142 | "docstring": "Impute ORG-derived wage and union inputs onto CPS persons.", |
143 | 143 | "id": "add_org_inputs", |
144 | 144 | "kind": "function", |
145 | | - "line": 2788, |
| 145 | + "line": 2822, |
146 | 146 | "metadata": { |
147 | 147 | "api_refs": [ |
148 | 148 | "policyengine_us_data.datasets.cps.cps.add_org_labor_market_inputs" |
|
169 | 169 | "docstring": "Add income variables.\n\nArgs:\n cps (h5py.File): The CPS dataset file.\n person (DataFrame): The CPS person table.\n year (int): The CPS year", |
170 | 170 | "id": "add_personal_income_variables", |
171 | 171 | "kind": "function", |
172 | | - "line": 1160, |
| 172 | + "line": 1194, |
173 | 173 | "metadata": { |
174 | 174 | "api_refs": [ |
175 | 175 | "policyengine_us_data.datasets.cps.cps.add_personal_income_variables" |
|
196 | 196 | "docstring": "Add personal demographic variables.\n\nArgs:\n cps (h5py.File): The CPS dataset file.\n person (DataFrame): The CPS person table.", |
197 | 197 | "id": "add_personal_variables", |
198 | 198 | "kind": "function", |
199 | | - "line": 1016, |
| 199 | + "line": 1050, |
200 | 200 | "metadata": { |
201 | 201 | "api_refs": [ |
202 | 202 | "policyengine_us_data.datasets.cps.cps.add_personal_variables" |
|
223 | 223 | "docstring": "", |
224 | 224 | "id": "add_previous_year_income", |
225 | 225 | "kind": "function", |
226 | | - "line": 1527, |
| 226 | + "line": 1561, |
227 | 227 | "metadata": { |
228 | 228 | "api_refs": [ |
229 | 229 | "policyengine_us_data.datasets.cps.cps.add_previous_year_income" |
|
250 | 250 | "docstring": "", |
251 | 251 | "id": "add_rent", |
252 | 252 | "kind": "function", |
253 | | - "line": 352, |
| 253 | + "line": 362, |
254 | 254 | "metadata": { |
255 | 255 | "api_refs": [ |
256 | 256 | "policyengine_us_data.datasets.cps.cps.add_rent" |
|
277 | 277 | "docstring": "", |
278 | 278 | "id": "add_spm_variables", |
279 | 279 | "kind": "function", |
280 | | - "line": 1446, |
| 280 | + "line": 1480, |
281 | 281 | "metadata": { |
282 | 282 | "api_refs": [ |
283 | 283 | "policyengine_us_data.datasets.cps.cps.add_spm_variables" |
|
304 | 304 | "docstring": "Assign SSN card type using PRCITSHP, employment status, and ASEC-UA conditions.\nCodes:\n- 0: \"NONE\" - Likely undocumented immigrants\n- 1: \"CITIZEN\" - US citizens (born or naturalized)\n- 2: \"NON_CITIZEN_VALID_EAD\" - Non-citizens with work/study authorization\n- 3: \"OTHER_NON_CITIZEN\" - Non-citizens with indicators of legal status", |
305 | 305 | "id": "add_ssn_card_type", |
306 | 306 | "kind": "function", |
307 | | - "line": 1633, |
| 307 | + "line": 1667, |
308 | 308 | "metadata": { |
309 | 309 | "api_refs": [ |
310 | 310 | "policyengine_us_data.datasets.cps.cps.add_ssn_card_type" |
|
331 | 331 | "docstring": "", |
332 | 332 | "id": "add_takeup", |
333 | 333 | "kind": "function", |
334 | | - "line": 476, |
| 334 | + "line": 510, |
335 | 335 | "metadata": { |
336 | 336 | "api_refs": [ |
337 | 337 | "policyengine_us_data.datasets.cps.cps.add_takeup" |
|
358 | 358 | "docstring": "", |
359 | 359 | "id": "add_tips", |
360 | 360 | "kind": "function", |
361 | | - "line": 2532, |
| 361 | + "line": 2566, |
362 | 362 | "metadata": { |
363 | 363 | "api_refs": [ |
364 | 364 | "policyengine_us_data.datasets.cps.cps.add_tips" |
|
1064 | 1064 | "docstring": "Subsample the loaded CPS dataset and preserve downsampled arrays.\n\nArgs:\n frac: Fraction of records to retain.", |
1065 | 1065 | "id": "downsample", |
1066 | 1066 | "kind": "function", |
1067 | | - "line": 319, |
| 1067 | + "line": 329, |
1068 | 1068 | "metadata": { |
1069 | 1069 | "api_refs": [ |
1070 | 1070 | "policyengine_us_data.datasets.cps.cps.CPS.downsample" |
|
3087 | 3087 | "docstring": "Run QRF imputation for PUF variables.\n\nStratified-subsamples PUF records (top 0.5% by AGI kept,\nrest randomly sampled to ~20K total), trains QRF, and\npredicts on CPS data.\n\nArgs:\n data: CPS data dict.\n time_period: Tax year.\n puf_dataset: PUF dataset class or path.\n dataset_path: Path to CPS h5 for computing\n demographic predictors via Microsimulation.\n\nReturns:\n Tuple of (y_full_imputations, y_override_imputations)\n as dicts of {variable: np.ndarray}.", |
3088 | 3088 | "id": "puf_qrf_pass", |
3089 | 3089 | "kind": "function", |
3090 | | - "line": 891, |
| 3090 | + "line": 914, |
3091 | 3091 | "metadata": { |
3092 | 3092 | "api_refs": [ |
3093 | 3093 | "policyengine_us_data.calibration.puf_impute._run_qrf_imputation" |
|
3145 | 3145 | "docstring": "Clone CPS data 2x and impute PUF variables on one half.\n\nThe first half keeps CPS values when CPS reports the variable.\nVariables absent from CPS get PUF QRF predictions on both halves\nso positive-weight CPS rows can support those calibration targets.\nThe second half still gets full PUF QRF imputation and starts with\nhousehold weights set to zero.\n\nArgs:\n data: CPS dataset dict {variable: {time_period: array}}.\n state_fips: State FIPS per household, shape (n_households,).\n block_geoid: Optional 15-character Census block GEOID per household.\n cd_geoid: Optional congressional district GEOID per household.\n county_fips: Optional 5-digit county FIPS per household.\n time_period: Tax year.\n puf_dataset: PUF dataset class or path for QRF training.\n If None, skips QRF (same as skip_qrf=True).\n skip_qrf: If True, skip QRF imputation (for testing).\n dataset_path: Path to CPS h5 file (needed for QRF to\n compute demographic predictors via Microsimulation).\n\nReturns:\n New data dict with doubled records.", |
3146 | 3146 | "id": "record_double", |
3147 | 3147 | "kind": "function", |
3148 | | - "line": 452, |
| 3148 | + "line": 475, |
3149 | 3149 | "metadata": { |
3150 | 3150 | "api_refs": [ |
3151 | 3151 | "policyengine_us_data.calibration.puf_impute.puf_clone_dataset" |
|
3345 | 3345 | "docstring": "Impute retirement contributions for the PUF half using QRF.\n\nTrains on CPS data (which has realistic income-to-contribution\nrelationships) and predicts onto PUF clone records using\nPUF-imputed income as input features.\n\nNote: ``pre_tax_contributions`` is separately imputed from PUF\nvia OVERRIDDEN_IMPUTED_VARIABLES. In PolicyEngine it is a\nformula (``adds`` of traditional_401k + traditional_403b + \u2026),\nso the stored value is only used when the formula is bypassed.\nA future improvement could reconcile or drop the stored\npre_tax_contributions in favour of the formula sum.\n\nArgs:\n data: CPS data dict.\n puf_imputations: Dict of PUF-imputed variable arrays.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n Dict mapping retirement variable names to imputed arrays.\n Returns all-zeros on QRF failure.", |
3346 | 3346 | "id": "retire_impute", |
3347 | 3347 | "kind": "function", |
3348 | | - "line": 746, |
| 3348 | + "line": 769, |
3349 | 3349 | "metadata": { |
3350 | 3350 | "api_refs": [ |
3351 | 3351 | "policyengine_us_data.calibration.puf_impute._impute_retirement_contributions" |
|
3534 | 3534 | "docstring": "Impute net_worth and auto_loan from SCF.\n\nArgs:\n data: CPS data dict.\n state_fips: State FIPS per household.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n Updated data dict.", |
3535 | 3535 | "id": "scf_qrf", |
3536 | 3536 | "kind": "function", |
3537 | | - "line": 1004, |
| 3537 | + "line": 1073, |
3538 | 3538 | "metadata": { |
3539 | 3539 | "api_refs": [ |
3540 | 3540 | "policyengine_us_data.calibration.source_impute._impute_scf" |
|
3588 | 3588 | "docstring": "Impute tip_income, liquid assets, and vehicle signals from SIPP.\n\nArgs:\n data: CPS data dict.\n state_fips: State FIPS per household.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n Updated data dict.", |
3589 | 3589 | "id": "sipp_qrf", |
3590 | 3590 | "kind": "function", |
3591 | | - "line": 591, |
| 3591 | + "line": 632, |
3592 | 3592 | "metadata": { |
3593 | 3593 | "api_refs": [ |
3594 | 3594 | "policyengine_us_data.calibration.source_impute._impute_sipp" |
|
3615 | 3615 | "docstring": "Re-impute ACS/SIPP/ORG/SCF variables from donor surveys.\n\nOverwrites existing imputed values in data. ACS uses\nstate_fips as a QRF predictor; ORG uses state plus labor-market\npredictors; SIPP and SCF use only demographic and financial\npredictors (no state data).\n\nArgs:\n data: CPS dataset dict {variable: {time_period: array}}.\n state_fips: State FIPS per household.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n skip_acs: Skip ACS imputation.\n skip_sipp: Skip SIPP imputation.\n skip_org: Skip ORG imputation.\n skip_scf: Skip SCF imputation.\n\nReturns:\n Updated data dict with re-imputed variables.", |
3616 | 3616 | "id": "source_impute", |
3617 | 3617 | "kind": "function", |
3618 | | - "line": 185, |
| 3618 | + "line": 202, |
3619 | 3619 | "metadata": { |
3620 | 3620 | "api_refs": [ |
3621 | 3621 | "policyengine_us_data.calibration.source_impute.impute_source_variables" |
|
3648 | 3648 | "docstring": "Predict SS sub-components for PUF half from demographics.\n\nThe CPS-PUF link is statistical (not identity-based), so the\npaired CPS record's sub-component split is just one noisy draw.\nA QRF trained on all CPS SS recipients gives a better expected\nprediction by pooling across the full training set.\n\nFor all PUF records with positive social_security, this function\npredicts shares via QRF (falling back to an age heuristic) and\nscales them to match the imputed total. PUF records with zero\nSS get all sub-components cleared to zero.\n\nModifies ``data`` in place. Only the PUF half (indices\nn_cps .. 2*n_cps) is changed.\n\nArgs:\n data: Dataset dict {variable: {time_period: array}}.\n n_cps: Number of records in the CPS half.\n time_period: Tax year key into data dicts.", |
3649 | 3649 | "id": "ss_reconcile", |
3650 | 3650 | "kind": "function", |
3651 | | - "line": 384, |
| 3651 | + "line": 407, |
3652 | 3652 | "metadata": { |
3653 | 3653 | "api_refs": [ |
3654 | 3654 | "policyengine_us_data.calibration.puf_impute.reconcile_ss_subcomponents" |
|
4146 | 4146 | "docstring": "Impute weeks_unemployed for the PUF half using QRF.\n\nUses CPS as training data and imputed PUF demographics as\ntest data, preserving the joint distribution of weeks with\nunemployment compensation.\n\nArgs:\n data: CPS data dict.\n puf_imputations: Dict of PUF-imputed variable arrays.\n time_period: Tax year.\n dataset_path: Path to CPS h5 for Microsimulation.\n\nReturns:\n Array of imputed weeks for PUF half.", |
4147 | 4147 | "id": "weeks_impute", |
4148 | 4148 | "kind": "function", |
4149 | | - "line": 638, |
| 4149 | + "line": 661, |
4150 | 4150 | "metadata": { |
4151 | 4151 | "api_refs": [ |
4152 | 4152 | "policyengine_us_data.calibration.puf_impute._impute_weeks_unemployed" |
|
0 commit comments