|
61 | 61 | "docstring": "\"Add auto loan balance, interest and net_worth variable.", |
62 | 62 | "id": "add_auto_loan", |
63 | 63 | "kind": "function", |
64 | | - "line": 2951, |
| 64 | + "line": 3074, |
65 | 65 | "metadata": { |
66 | 66 | "api_refs": [ |
67 | 67 | "policyengine_us_data.datasets.cps.cps.add_auto_loan_interest_and_net_worth" |
|
88 | 88 | "docstring": "Populate household-level geography variables used by PolicyEngine US.\n\nArgs:\n cps: Output CPS H5 group receiving derived household variables.\n household: Raw CPS household table.", |
89 | 89 | "id": "add_household_variables", |
90 | 90 | "kind": "function", |
91 | | - "line": 1531, |
| 91 | + "line": 1667, |
92 | 92 | "metadata": { |
93 | 93 | "api_refs": [ |
94 | 94 | "policyengine_us_data.datasets.cps.cps.add_household_variables" |
|
115 | 115 | "docstring": "Add basic ID and weight variables.\n\nArgs:\n cps (h5py.File): The CPS dataset file.\n person (DataFrame): The person table of the ASEC.\n tax_unit (DataFrame): The tax unit table created from the person table\n of the ASEC.\n family (DataFrame): The family table of the ASEC.\n spm_unit (DataFrame): The SPM unit table created from the person table\n of the ASEC.\n household (DataFrame): The household table of the ASEC.", |
116 | 116 | "id": "add_id_variables", |
117 | 117 | "kind": "function", |
118 | | - "line": 997, |
| 118 | + "line": 1043, |
119 | 119 | "metadata": { |
120 | 120 | "api_refs": [ |
121 | 121 | "policyengine_us_data.datasets.cps.cps.add_id_variables" |
|
139 | 139 | "source_file": "policyengine_us_data/datasets/cps/cps.py" |
140 | 140 | }, |
141 | 141 | "add_org_inputs": { |
142 | | - "docstring": "Impute ORG-derived wage and union inputs onto CPS persons.", |
| 142 | + "docstring": "Impute ORG-derived labor-market inputs and derive overtime premium.", |
143 | 143 | "id": "add_org_inputs", |
144 | 144 | "kind": "function", |
145 | | - "line": 2835, |
| 145 | + "line": 2974, |
146 | 146 | "metadata": { |
147 | 147 | "api_refs": [ |
148 | 148 | "policyengine_us_data.datasets.cps.cps.add_org_labor_market_inputs" |
149 | 149 | ], |
150 | | - "description": "Impute hourly wage, hourly-pay status, and union coverage from CPS ORG donors.", |
| 150 | + "description": "Impute hourly wage, hourly-pay status, and union coverage from CPS ORG donors, then derive FLSA overtime premium.", |
151 | 151 | "id": "add_org_inputs", |
152 | 152 | "label": "ORG Labor-Market Inputs", |
153 | 153 | "node_type": "library", |
|
162 | 162 | ] |
163 | 163 | }, |
164 | 164 | "object_path": "policyengine_us_data.datasets.cps.cps.add_org_labor_market_inputs", |
165 | | - "signature": "def add_org_labor_market_inputs(cps: h5py.File) -> None", |
| 165 | + "signature": "def add_org_labor_market_inputs(cps: h5py.File, time_period: int) -> None", |
166 | 166 | "source_file": "policyengine_us_data/datasets/cps/cps.py" |
167 | 167 | }, |
168 | 168 | "add_personal_income_variables": { |
169 | 169 | "docstring": "Add income variables.\n\nArgs:\n cps (h5py.File): The CPS dataset file.\n person (DataFrame): The CPS person table.\n year (int): The CPS year", |
170 | 170 | "id": "add_personal_income_variables", |
171 | 171 | "kind": "function", |
172 | | - "line": 1206, |
| 172 | + "line": 1342, |
173 | 173 | "metadata": { |
174 | 174 | "api_refs": [ |
175 | 175 | "policyengine_us_data.datasets.cps.cps.add_personal_income_variables" |
|
196 | 196 | "docstring": "Add personal demographic variables.\n\nArgs:\n cps (h5py.File): The CPS dataset file.\n person (DataFrame): The CPS person table.", |
197 | 197 | "id": "add_personal_variables", |
198 | 198 | "kind": "function", |
199 | | - "line": 1059, |
| 199 | + "line": 1105, |
200 | 200 | "metadata": { |
201 | 201 | "api_refs": [ |
202 | 202 | "policyengine_us_data.datasets.cps.cps.add_personal_variables" |
|
223 | 223 | "docstring": "", |
224 | 224 | "id": "add_previous_year_income", |
225 | 225 | "kind": "function", |
226 | | - "line": 1573, |
| 226 | + "line": 1709, |
227 | 227 | "metadata": { |
228 | 228 | "api_refs": [ |
229 | 229 | "policyengine_us_data.datasets.cps.cps.add_previous_year_income" |
|
250 | 250 | "docstring": "", |
251 | 251 | "id": "add_rent", |
252 | 252 | "kind": "function", |
253 | | - "line": 371, |
| 253 | + "line": 417, |
254 | 254 | "metadata": { |
255 | 255 | "api_refs": [ |
256 | 256 | "policyengine_us_data.datasets.cps.cps.add_rent" |
|
277 | 277 | "docstring": "", |
278 | 278 | "id": "add_spm_variables", |
279 | 279 | "kind": "function", |
280 | | - "line": 1492, |
| 280 | + "line": 1628, |
281 | 281 | "metadata": { |
282 | 282 | "api_refs": [ |
283 | 283 | "policyengine_us_data.datasets.cps.cps.add_spm_variables" |
|
304 | 304 | "docstring": "Assign SSN card type using PRCITSHP, employment status, and ASEC-UA conditions.\nCodes:\n- 0: \"NONE\" - Likely undocumented immigrants\n- 1: \"CITIZEN\" - US citizens (born or naturalized)\n- 2: \"NON_CITIZEN_VALID_EAD\" - Non-citizens with work/study authorization\n- 3: \"OTHER_NON_CITIZEN\" - Non-citizens with indicators of legal status", |
305 | 305 | "id": "add_ssn_card_type", |
306 | 306 | "kind": "function", |
307 | | - "line": 1679, |
| 307 | + "line": 1815, |
308 | 308 | "metadata": { |
309 | 309 | "api_refs": [ |
310 | 310 | "policyengine_us_data.datasets.cps.cps.add_ssn_card_type" |
|
331 | 331 | "docstring": "", |
332 | 332 | "id": "add_takeup", |
333 | 333 | "kind": "function", |
334 | | - "line": 519, |
| 334 | + "line": 565, |
335 | 335 | "metadata": { |
336 | 336 | "api_refs": [ |
337 | 337 | "policyengine_us_data.datasets.cps.cps.add_takeup" |
|
358 | 358 | "docstring": "", |
359 | 359 | "id": "add_tips", |
360 | 360 | "kind": "function", |
361 | | - "line": 2578, |
| 361 | + "line": 2714, |
362 | 362 | "metadata": { |
363 | 363 | "api_refs": [ |
364 | 364 | "policyengine_us_data.datasets.cps.cps.add_tips" |
|
815 | 815 | "docstring": "Replace clone-half person-level feature variables with donor matches.", |
816 | 816 | "id": "clone_features", |
817 | 817 | "kind": "function", |
818 | | - "line": 585, |
| 818 | + "line": 604, |
819 | 819 | "metadata": { |
820 | 820 | "api_refs": [ |
821 | 821 | "policyengine_us_data.datasets.cps.extended_cps._splice_clone_feature_predictions" |
|
878 | 878 | "docstring": "Assert that final exported variables are leaf inputs.", |
879 | 879 | "id": "computed_export_contract", |
880 | 880 | "kind": "function", |
881 | | - "line": 1775, |
| 881 | + "line": 1795, |
882 | 882 | "metadata": { |
883 | 883 | "api_refs": [ |
884 | 884 | "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._assert_no_computed_variables_exported" |
|
972 | 972 | "docstring": "Second-stage QRF: train on CPS, predict for PUF clones.\n\nFor the PUF clone half of the extended CPS we need plausible values\nof CPS-only variables (retirement distributions, transfers, hours,\nSPM components, etc.) that are consistent with the clone's\nPUF-imputed income -- not just naively copied from the CPS donor.\n\nWe train a QRF on CPS person-level data where:\n * predictors = demographics + key income variables\n * outputs = CPS-only variables listed in\n ``CPS_ONLY_IMPUTED_VARIABLES``\n\nFor PUF clone prediction we use the PUF-imputed income values\nfrom the second half of ``data`` (the clone half, which already\nhas PUF-imputed income from stage 1).\n\nUses ``fit_predict()`` with ``max_train_samples`` instead of\nmanual sampling + separate fit/predict.\n\nArgs:\n data: Extended dataset dict after ``puf_clone_dataset()`` --\n already doubled, with PUF-imputed income in the second half.\n time_period: Tax year.\n dataset_path: Path to the CPS h5 file for Microsimulation.\n\nReturns:\n DataFrame with one column per CPS-only variable, containing\n predicted values for the PUF clone half (person-level).", |
973 | 973 | "id": "cps_only", |
974 | 974 | "kind": "function", |
975 | | - "line": 624, |
| 975 | + "line": 643, |
976 | 976 | "metadata": { |
977 | 977 | "api_refs": [ |
978 | 978 | "policyengine_us_data.datasets.cps.extended_cps._impute_cps_only_variables" |
|
1064 | 1064 | "docstring": "Subsample the loaded CPS dataset and preserve downsampled arrays.\n\nArgs:\n frac: Fraction of records to retain.", |
1065 | 1065 | "id": "downsample", |
1066 | 1066 | "kind": "function", |
1067 | | - "line": 338, |
| 1067 | + "line": 384, |
1068 | 1068 | "metadata": { |
1069 | 1069 | "api_refs": [ |
1070 | 1070 | "policyengine_us_data.datasets.cps.cps.CPS.downsample" |
|
1325 | 1325 | "docstring": "Check formula-reconstructed housing assistance before export.\n\nThe final H5 must not export formula outputs such as ``housing_assistance``.\nThis guard verifies that the remaining leaf inputs still make those\nformulas produce nonzero values before the export contract strips or\nrejects computed variables.", |
1326 | 1326 | "id": "housing_assistance_microsim_validation", |
1327 | 1327 | "kind": "function", |
1328 | | - "line": 1545, |
| 1328 | + "line": 1565, |
1329 | 1329 | "metadata": { |
1330 | 1330 | "api_refs": [ |
1331 | 1331 | "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._validate_housing_assistance_microsimulation" |
|
3216 | 3216 | "docstring": "Replace PUF clone half of CPS-only variables with QRF predictions.\n\nAfter ``puf_clone_dataset()`` the CPS-only variables in the second\nhalf are naive copies of the CPS donor values. This function\nreplaces them with the second-stage QRF predictions that are\nconsistent with the clone's PUF-imputed income.\n\nArgs:\n data: Extended dataset dict (already doubled).\n predictions: DataFrame from ``_impute_cps_only_variables()``.\n time_period: Tax year.\n dataset_path: Path to CPS h5 file for entity mapping.\n\nReturns:\n Modified data dict with CPS-only variables spliced in.", |
3217 | 3217 | "id": "qrf_pass2", |
3218 | 3218 | "kind": "function", |
3219 | | - "line": 1015, |
| 3219 | + "line": 1034, |
3220 | 3220 | "metadata": { |
3221 | 3221 | "api_refs": [ |
3222 | 3222 | "policyengine_us_data.datasets.cps.extended_cps._splice_cps_only_predictions" |
|
0 commit comments