|
50 | 50 | from policyengine_us_data.utils.asset_imputation import ( |
51 | 51 | build_household_vehicle_receiver, |
52 | 52 | ) |
53 | | -from policyengine_us_data.utils.policyengine import ( |
54 | | - supports_medicare_enrollment_input, |
55 | | - supports_modeled_medicare_part_b_inputs, |
56 | | -) |
57 | | - |
58 | 53 |
|
59 | 54 | CURRENT_HEALTH_COVERAGE_REPORTED_VAR_MAP = { |
60 | 55 | "reported_has_direct_purchase_health_coverage_at_interview": "NOW_DIR", |
@@ -193,6 +188,8 @@ def generate(self): |
193 | 188 | add_takeup(self) |
194 | 189 | logging.info("Imputing Marketplace plan benchmark ratio") |
195 | 190 | add_marketplace_plan_benchmark_ratio(self) |
| 191 | + logging.info("Deriving other health insurance premiums") |
| 192 | + derive_other_health_insurance_premiums(self) |
196 | 193 | logging.info("Downsampling") |
197 | 194 |
|
198 | 195 | # Downsample |
@@ -519,6 +516,124 @@ def add_marketplace_plan_benchmark_ratio(self): |
519 | 516 | self.save_dataset(data) |
520 | 517 |
|
521 | 518 |
|
| 519 | +OTHER_HEALTH_INSURANCE_PREMIUM_TARGETS = { |
| 520 | + "other_health_insurance_premiums": { |
| 521 | + "reported_variable": "health_insurance_premiums_without_medicare_part_b", |
| 522 | + "modeled_variables": ( |
| 523 | + "chip_premium", |
| 524 | + "marketplace_net_premium", |
| 525 | + "medicaid_premium", |
| 526 | + ), |
| 527 | + }, |
| 528 | +} |
| 529 | + |
| 530 | + |
| 531 | +def derive_other_health_insurance_premiums(self): |
| 532 | + """Create other premium inputs net of baseline computed premiums. |
| 533 | +
|
| 534 | + The model adds computed premiums back explicitly, so it needs a separate |
| 535 | + other-premium input for the parts of CPS-reported non-Medicare premiums |
| 536 | + not explained by baseline computed Marketplace, CHIP, or Medicaid |
| 537 | + premiums. The original CPS-reported premium inputs remain unchanged as raw |
| 538 | + source fields. The data package requires a policyengine-us release with |
| 539 | + these modeled premium variables, so missing variables fail fast instead of |
| 540 | + silently producing an incomplete decomposition. |
| 541 | + """ |
| 542 | + from policyengine_us import Microsimulation |
| 543 | + |
| 544 | + data = self.load_dataset() |
| 545 | + baseline = Microsimulation(dataset=self) |
| 546 | + tbs = baseline.tax_benefit_system |
| 547 | + period = self.time_period |
| 548 | + changed = False |
| 549 | + |
| 550 | + for output_variable, config in OTHER_HEALTH_INSURANCE_PREMIUM_TARGETS.items(): |
| 551 | + reported_variable = config["reported_variable"] |
| 552 | + premium_variables = config["modeled_variables"] |
| 553 | + |
| 554 | + if reported_variable not in data: |
| 555 | + continue |
| 556 | + |
| 557 | + computed_premium = np.zeros(len(data[reported_variable]), dtype=float) |
| 558 | + for variable in premium_variables: |
| 559 | + values = np.asarray( |
| 560 | + baseline.calculate(variable, period=period).values, |
| 561 | + dtype=float, |
| 562 | + ) |
| 563 | + computed_premium += _premium_values_to_person( |
| 564 | + data=data, |
| 565 | + source_entity=tbs.variables[variable].entity.key, |
| 566 | + values=values, |
| 567 | + ) |
| 568 | + |
| 569 | + data[output_variable] = compute_other_health_insurance_premiums( |
| 570 | + reported_premium=data[reported_variable], |
| 571 | + baseline_computed_premium=computed_premium, |
| 572 | + ) |
| 573 | + logging.info( |
| 574 | + "Created %s from %s by subtracting baseline computed premiums: %s", |
| 575 | + output_variable, |
| 576 | + reported_variable, |
| 577 | + ", ".join(premium_variables), |
| 578 | + ) |
| 579 | + changed = True |
| 580 | + |
| 581 | + if changed: |
| 582 | + self.save_dataset(data) |
| 583 | + |
| 584 | + |
| 585 | +def compute_other_health_insurance_premiums( |
| 586 | + reported_premium: np.ndarray, |
| 587 | + baseline_computed_premium: np.ndarray, |
| 588 | +) -> np.ndarray: |
| 589 | + """Return other premiums after subtracting baseline computed premiums.""" |
| 590 | + return np.asarray(reported_premium, dtype=float) - np.asarray( |
| 591 | + baseline_computed_premium, dtype=float |
| 592 | + ) |
| 593 | + |
| 594 | + |
| 595 | +def _premium_values_to_person( |
| 596 | + data: dict, |
| 597 | + source_entity: str, |
| 598 | + values: np.ndarray, |
| 599 | +) -> np.ndarray: |
| 600 | + """Map computed premiums to person rows for person-level premium accounting.""" |
| 601 | + person_ids = data["person_id"] |
| 602 | + if source_entity == "person": |
| 603 | + if len(values) != len(person_ids): |
| 604 | + raise ValueError( |
| 605 | + "Person-level computed premium length does not match person rows: " |
| 606 | + f"got {len(values)}, expected {len(person_ids)}." |
| 607 | + ) |
| 608 | + return values |
| 609 | + |
| 610 | + entity_id_variable = f"{source_entity}_id" |
| 611 | + person_entity_id_variable = f"person_{source_entity}_id" |
| 612 | + if entity_id_variable not in data or person_entity_id_variable not in data: |
| 613 | + raise ValueError( |
| 614 | + f"Cannot allocate {source_entity}-level premiums to people: missing " |
| 615 | + f"{entity_id_variable} or {person_entity_id_variable}." |
| 616 | + ) |
| 617 | + |
| 618 | + entity_ids = data[entity_id_variable] |
| 619 | + person_entity_ids = data[person_entity_id_variable] |
| 620 | + if len(values) != len(entity_ids): |
| 621 | + raise ValueError( |
| 622 | + f"{source_entity}-level computed premium length does not match " |
| 623 | + f"{source_entity} rows: got {len(values)}, expected {len(entity_ids)}." |
| 624 | + ) |
| 625 | + |
| 626 | + entity_position = {entity_id: index for index, entity_id in enumerate(entity_ids)} |
| 627 | + allocated = np.zeros(len(person_ids), dtype=float) |
| 628 | + seen_entities = set() |
| 629 | + for person_index, entity_id in enumerate(person_entity_ids): |
| 630 | + if entity_id in seen_entities: |
| 631 | + continue |
| 632 | + allocated[person_index] = values[entity_position[entity_id]] |
| 633 | + seen_entities.add(entity_id) |
| 634 | + return allocated |
| 635 | + |
| 636 | + |
522 | 637 | MARKETPLACE_PLAN_BENCHMARK_RATIO_MIN = 0.5 |
523 | 638 | MARKETPLACE_PLAN_BENCHMARK_RATIO_MAX = 1.5 |
524 | 639 |
|
@@ -1009,12 +1124,7 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int): |
1009 | 1124 | cps["health_insurance_premiums_without_medicare_part_b"] = person.PHIP_VAL |
1010 | 1125 | cps["over_the_counter_health_expenses"] = person.POTC_VAL |
1011 | 1126 | cps["other_medical_expenses"] = person.PMED_VAL |
1012 | | - if supports_medicare_enrollment_input(): |
1013 | | - cps["medicare_enrolled"] = person.MCARE == 1 |
1014 | | - if supports_modeled_medicare_part_b_inputs(): |
1015 | | - cps["medicare_part_b_premiums_reported"] = person.PEMCPREM |
1016 | | - else: |
1017 | | - cps["medicare_part_b_premiums"] = person.PEMCPREM |
| 1127 | + cps["medicare_enrolled"] = person.MCARE == 1 |
1018 | 1128 |
|
1019 | 1129 | # Get QBI simulation parameters --- |
1020 | 1130 | yamlfilename = ( |
|
0 commit comments