|
14 | 14 | ) |
15 | 15 | from microimpute.models.qrf import QRF |
16 | 16 | import logging |
| 17 | +from policyengine_us_data.parameters import load_take_up_rate |
| 18 | +from policyengine_us_data.utils.randomness import seeded_rng |
17 | 19 |
|
18 | 20 |
|
19 | 21 | class CPS(Dataset): |
@@ -191,28 +193,101 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame): |
191 | 193 | def add_takeup(self): |
192 | 194 | data = self.load_dataset() |
193 | 195 |
|
194 | | - from policyengine_us import system, Microsimulation |
| 196 | + from policyengine_us import Microsimulation |
195 | 197 |
|
196 | 198 | baseline = Microsimulation(dataset=self) |
197 | | - parameters = baseline.tax_benefit_system.parameters(self.time_period) |
198 | 199 |
|
199 | | - generator = np.random.default_rng(seed=100) |
| 200 | + n_persons = len(data["person_id"]) |
| 201 | + n_tax_units = len(data["tax_unit_id"]) |
| 202 | + n_spm_units = len(data["spm_unit_id"]) |
| 203 | + |
| 204 | + # Load take-up rates |
| 205 | + eitc_rates_by_children = load_take_up_rate("eitc", self.time_period) |
| 206 | + dc_ptc_rate = load_take_up_rate("dc_ptc", self.time_period) |
| 207 | + snap_rate = load_take_up_rate("snap", self.time_period) |
| 208 | + aca_rate = load_take_up_rate("aca", self.time_period) |
| 209 | + medicaid_rates_by_state = load_take_up_rate("medicaid", self.time_period) |
| 210 | + head_start_rate = load_take_up_rate("head_start", self.time_period) |
| 211 | + early_head_start_rate = load_take_up_rate( |
| 212 | + "early_head_start", self.time_period |
| 213 | + ) |
| 214 | + ssi_pass_rate = load_take_up_rate("ssi_pass_rate", self.time_period) |
200 | 215 |
|
201 | | - eitc_takeup_rates = parameters.gov.irs.credits.eitc.takeup |
| 216 | + # EITC: varies by number of children |
202 | 217 | eitc_child_count = baseline.calculate("eitc_child_count").values |
203 | | - eitc_takeup_rate = eitc_takeup_rates.calc(eitc_child_count) |
204 | | - data["takes_up_eitc"] = ( |
205 | | - generator.random(len(data["tax_unit_id"])) < eitc_takeup_rate |
| 218 | + eitc_takeup_rate = np.array( |
| 219 | + [ |
| 220 | + eitc_rates_by_children.get(min(int(c), 3), 0.85) |
| 221 | + for c in eitc_child_count |
| 222 | + ] |
| 223 | + ) |
| 224 | + rng = seeded_rng("takes_up_eitc") |
| 225 | + data["takes_up_eitc"] = rng.random(n_tax_units) < eitc_takeup_rate |
| 226 | + |
| 227 | + # DC Property Tax Credit |
| 228 | + rng = seeded_rng("takes_up_dc_ptc") |
| 229 | + data["takes_up_dc_ptc"] = rng.random(n_tax_units) < dc_ptc_rate |
| 230 | + |
| 231 | + # SNAP |
| 232 | + rng = seeded_rng("takes_up_snap_if_eligible") |
| 233 | + data["takes_up_snap_if_eligible"] = rng.random(n_spm_units) < snap_rate |
| 234 | + |
| 235 | + # ACA |
| 236 | + rng = seeded_rng("takes_up_aca_if_eligible") |
| 237 | + data["takes_up_aca_if_eligible"] = rng.random(n_tax_units) < aca_rate |
| 238 | + |
| 239 | + # Medicaid: state-specific rates |
| 240 | + state_codes = baseline.calculate("state_code_str").values |
| 241 | + hh_ids = data["household_id"] |
| 242 | + person_hh_ids = data["person_household_id"] |
| 243 | + hh_to_state = dict(zip(hh_ids, state_codes)) |
| 244 | + person_states = np.array( |
| 245 | + [hh_to_state.get(hh_id, "CA") for hh_id in person_hh_ids] |
206 | 246 | ) |
207 | | - dc_ptc_takeup_rate = parameters.gov.states.dc.tax.income.credits.ptc.takeup |
208 | | - data["takes_up_dc_ptc"] = ( |
209 | | - generator.random(len(data["tax_unit_id"])) < dc_ptc_takeup_rate |
| 247 | + medicaid_rate_by_person = np.array( |
| 248 | + [medicaid_rates_by_state.get(s, 0.93) for s in person_states] |
| 249 | + ) |
| 250 | + rng = seeded_rng("takes_up_medicaid_if_eligible") |
| 251 | + data["takes_up_medicaid_if_eligible"] = ( |
| 252 | + rng.random(n_persons) < medicaid_rate_by_person |
| 253 | + ) |
| 254 | + |
| 255 | + # Head Start |
| 256 | + rng = seeded_rng("takes_up_head_start_if_eligible") |
| 257 | + data["takes_up_head_start_if_eligible"] = ( |
| 258 | + rng.random(n_persons) < head_start_rate |
| 259 | + ) |
| 260 | + |
| 261 | + # Early Head Start |
| 262 | + rng = seeded_rng("takes_up_early_head_start_if_eligible") |
| 263 | + data["takes_up_early_head_start_if_eligible"] = ( |
| 264 | + rng.random(n_persons) < early_head_start_rate |
210 | 265 | ) |
211 | | - generator = np.random.default_rng(seed=100) |
212 | 266 |
|
213 | | - data["snap_take_up_seed"] = generator.random(len(data["spm_unit_id"])) |
214 | | - data["aca_take_up_seed"] = generator.random(len(data["tax_unit_id"])) |
215 | | - data["medicaid_take_up_seed"] = generator.random(len(data["person_id"])) |
| 267 | + # SSI resource test |
| 268 | + rng = seeded_rng("meets_ssi_resource_test") |
| 269 | + data["meets_ssi_resource_test"] = rng.random(n_persons) < ssi_pass_rate |
| 270 | + |
| 271 | + # WIC: resolve draws to bools using category-specific rates |
| 272 | + wic_categories = baseline.calculate("wic_category_str").values |
| 273 | + wic_takeup_rates = load_take_up_rate("wic_takeup", self.time_period) |
| 274 | + wic_takeup_rate_by_person = np.array( |
| 275 | + [wic_takeup_rates.get(c, 0) for c in wic_categories] |
| 276 | + ) |
| 277 | + rng = seeded_rng("would_claim_wic") |
| 278 | + data["would_claim_wic"] = rng.random(n_persons) < wic_takeup_rate_by_person |
| 279 | + |
| 280 | + # WIC nutritional risk — fully resolved |
| 281 | + wic_risk_rates = load_take_up_rate( |
| 282 | + "wic_nutritional_risk", self.time_period |
| 283 | + ) |
| 284 | + wic_risk_rate_by_person = np.array( |
| 285 | + [wic_risk_rates.get(c, 0) for c in wic_categories] |
| 286 | + ) |
| 287 | + receives_wic = baseline.calculate("receives_wic").values |
| 288 | + rng = seeded_rng("is_wic_at_nutritional_risk") |
| 289 | + imputed_risk = rng.random(n_persons) < wic_risk_rate_by_person |
| 290 | + data["is_wic_at_nutritional_risk"] = receives_wic | imputed_risk |
216 | 291 |
|
217 | 292 | self.save_dataset(data) |
218 | 293 |
|
|
0 commit comments