Skip to content

Commit c9be05c

Browse files
Derive other health insurance premiums (#844)
* Residualize modeled health premiums * Store residual health insurance premiums * Rename other health insurance premium input * Gate Part B target name on installed model * Keep legacy Part B premium input for current model * Emit decomposed premium input for current builds * Require clean MOOP policyengine-us version * Pin policyengine-us release for MOOP decomposition --------- Co-authored-by: Max Ghenis <mghenis@gmail.com>
1 parent 46d95fa commit c9be05c

15 files changed

Lines changed: 294 additions & 75 deletions

File tree

changelog.d/8089.fixed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added other health insurance premiums as the non-Medicare premium category not covered by modeled Marketplace, CHIP, or Medicaid premiums.

policyengine_us_data/calibration/target_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ include:
122122
geo_level: national
123123
- variable: medicaid
124124
geo_level: national
125-
- variable: medicare_part_b_premiums
125+
- variable: medicare_part_b_premium
126126
geo_level: national
127127
- variable: other_medical_expenses
128128
geo_level: national

policyengine_us_data/datasets/cps/cps.py

Lines changed: 121 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,6 @@
5050
from policyengine_us_data.utils.asset_imputation import (
5151
build_household_vehicle_receiver,
5252
)
53-
from policyengine_us_data.utils.policyengine import (
54-
supports_medicare_enrollment_input,
55-
supports_modeled_medicare_part_b_inputs,
56-
)
57-
5853

5954
CURRENT_HEALTH_COVERAGE_REPORTED_VAR_MAP = {
6055
"reported_has_direct_purchase_health_coverage_at_interview": "NOW_DIR",
@@ -193,6 +188,8 @@ def generate(self):
193188
add_takeup(self)
194189
logging.info("Imputing Marketplace plan benchmark ratio")
195190
add_marketplace_plan_benchmark_ratio(self)
191+
logging.info("Deriving other health insurance premiums")
192+
derive_other_health_insurance_premiums(self)
196193
logging.info("Downsampling")
197194

198195
# Downsample
@@ -519,6 +516,124 @@ def add_marketplace_plan_benchmark_ratio(self):
519516
self.save_dataset(data)
520517

521518

519+
OTHER_HEALTH_INSURANCE_PREMIUM_TARGETS = {
520+
"other_health_insurance_premiums": {
521+
"reported_variable": "health_insurance_premiums_without_medicare_part_b",
522+
"modeled_variables": (
523+
"chip_premium",
524+
"marketplace_net_premium",
525+
"medicaid_premium",
526+
),
527+
},
528+
}
529+
530+
531+
def derive_other_health_insurance_premiums(self):
532+
"""Create other premium inputs net of baseline computed premiums.
533+
534+
The model adds computed premiums back explicitly, so it needs a separate
535+
other-premium input for the parts of CPS-reported non-Medicare premiums
536+
not explained by baseline computed Marketplace, CHIP, or Medicaid
537+
premiums. The original CPS-reported premium inputs remain unchanged as raw
538+
source fields. The data package requires a policyengine-us release with
539+
these modeled premium variables, so missing variables fail fast instead of
540+
silently producing an incomplete decomposition.
541+
"""
542+
from policyengine_us import Microsimulation
543+
544+
data = self.load_dataset()
545+
baseline = Microsimulation(dataset=self)
546+
tbs = baseline.tax_benefit_system
547+
period = self.time_period
548+
changed = False
549+
550+
for output_variable, config in OTHER_HEALTH_INSURANCE_PREMIUM_TARGETS.items():
551+
reported_variable = config["reported_variable"]
552+
premium_variables = config["modeled_variables"]
553+
554+
if reported_variable not in data:
555+
continue
556+
557+
computed_premium = np.zeros(len(data[reported_variable]), dtype=float)
558+
for variable in premium_variables:
559+
values = np.asarray(
560+
baseline.calculate(variable, period=period).values,
561+
dtype=float,
562+
)
563+
computed_premium += _premium_values_to_person(
564+
data=data,
565+
source_entity=tbs.variables[variable].entity.key,
566+
values=values,
567+
)
568+
569+
data[output_variable] = compute_other_health_insurance_premiums(
570+
reported_premium=data[reported_variable],
571+
baseline_computed_premium=computed_premium,
572+
)
573+
logging.info(
574+
"Created %s from %s by subtracting baseline computed premiums: %s",
575+
output_variable,
576+
reported_variable,
577+
", ".join(premium_variables),
578+
)
579+
changed = True
580+
581+
if changed:
582+
self.save_dataset(data)
583+
584+
585+
def compute_other_health_insurance_premiums(
586+
reported_premium: np.ndarray,
587+
baseline_computed_premium: np.ndarray,
588+
) -> np.ndarray:
589+
"""Return other premiums after subtracting baseline computed premiums."""
590+
return np.asarray(reported_premium, dtype=float) - np.asarray(
591+
baseline_computed_premium, dtype=float
592+
)
593+
594+
595+
def _premium_values_to_person(
596+
data: dict,
597+
source_entity: str,
598+
values: np.ndarray,
599+
) -> np.ndarray:
600+
"""Map computed premiums to person rows for person-level premium accounting."""
601+
person_ids = data["person_id"]
602+
if source_entity == "person":
603+
if len(values) != len(person_ids):
604+
raise ValueError(
605+
"Person-level computed premium length does not match person rows: "
606+
f"got {len(values)}, expected {len(person_ids)}."
607+
)
608+
return values
609+
610+
entity_id_variable = f"{source_entity}_id"
611+
person_entity_id_variable = f"person_{source_entity}_id"
612+
if entity_id_variable not in data or person_entity_id_variable not in data:
613+
raise ValueError(
614+
f"Cannot allocate {source_entity}-level premiums to people: missing "
615+
f"{entity_id_variable} or {person_entity_id_variable}."
616+
)
617+
618+
entity_ids = data[entity_id_variable]
619+
person_entity_ids = data[person_entity_id_variable]
620+
if len(values) != len(entity_ids):
621+
raise ValueError(
622+
f"{source_entity}-level computed premium length does not match "
623+
f"{source_entity} rows: got {len(values)}, expected {len(entity_ids)}."
624+
)
625+
626+
entity_position = {entity_id: index for index, entity_id in enumerate(entity_ids)}
627+
allocated = np.zeros(len(person_ids), dtype=float)
628+
seen_entities = set()
629+
for person_index, entity_id in enumerate(person_entity_ids):
630+
if entity_id in seen_entities:
631+
continue
632+
allocated[person_index] = values[entity_position[entity_id]]
633+
seen_entities.add(entity_id)
634+
return allocated
635+
636+
522637
MARKETPLACE_PLAN_BENCHMARK_RATIO_MIN = 0.5
523638
MARKETPLACE_PLAN_BENCHMARK_RATIO_MAX = 1.5
524639

@@ -1009,12 +1124,7 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
10091124
cps["health_insurance_premiums_without_medicare_part_b"] = person.PHIP_VAL
10101125
cps["over_the_counter_health_expenses"] = person.POTC_VAL
10111126
cps["other_medical_expenses"] = person.PMED_VAL
1012-
if supports_medicare_enrollment_input():
1013-
cps["medicare_enrolled"] = person.MCARE == 1
1014-
if supports_modeled_medicare_part_b_inputs():
1015-
cps["medicare_part_b_premiums_reported"] = person.PEMCPREM
1016-
else:
1017-
cps["medicare_part_b_premiums"] = person.PEMCPREM
1127+
cps["medicare_enrolled"] = person.MCARE == 1
10181128

10191129
# Get QBI simulation parameters ---
10201130
yamlfilename = (

policyengine_us_data/datasets/cps/extended_cps.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@
1919
impute_tax_unit_mortgage_balance_hints,
2020
)
2121
from policyengine_us_data.utils.policyengine import has_policyengine_us_variables
22-
from policyengine_us_data.utils.policyengine import (
23-
supports_modeled_medicare_part_b_inputs,
24-
)
2522
from policyengine_us_data.utils.retirement_limits import (
2623
get_retirement_limits,
2724
get_se_pension_limits,
@@ -151,6 +148,7 @@ def _supports_structural_mortgage_inputs() -> bool:
151148
"spm_unit_pre_subsidy_childcare_expenses",
152149
# Medical expenses
153150
"health_insurance_premiums_without_medicare_part_b",
151+
"other_health_insurance_premiums",
154152
"over_the_counter_health_expenses",
155153
"other_medical_expenses",
156154
"child_support_expense",
@@ -166,9 +164,6 @@ def _supports_structural_mortgage_inputs() -> bool:
166164
"self_employment_income_last_year",
167165
]
168166

169-
if not supports_modeled_medicare_part_b_inputs():
170-
CPS_ONLY_IMPUTED_VARIABLES.append("medicare_part_b_premiums")
171-
172167
# Set for O(1) lookup in the splice loop.
173168
_CPS_ONLY_SET = set(CPS_ONLY_IMPUTED_VARIABLES)
174169

policyengine_us_data/datasets/puf/puf.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
STRUCTURAL_MORTGAGE_VARIABLES,
1818
convert_mortgage_interest_to_structural_inputs,
1919
)
20-
from policyengine_us_data.utils.policyengine import has_policyengine_us_variables
20+
from policyengine_us_data.utils.policyengine import (
21+
has_policyengine_us_variables,
22+
)
2123
from policyengine_us_data.utils.uprating import (
2224
create_policyengine_uprating_factors_table,
2325
)
@@ -984,7 +986,7 @@ class PUF_2024(PUF):
984986
"health_insurance_premiums_without_medicare_part_b": 0.453,
985987
"other_medical_expenses": 0.325,
986988
"over_the_counter_health_expenses": 0.085,
987-
"medicare_part_b_premiums": 0.137,
989+
"medicare_part_b_premium": 0.137,
988990
}
989991

990992
if __name__ == "__main__":

policyengine_us_data/db/etl_national_targets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
155155
"year": 2024,
156156
},
157157
{
158-
"variable": "medicare_part_b_premiums",
158+
"variable": "medicare_part_b_premium",
159159
"value": get_beneficiary_paid_medicare_part_b_premiums_target(2024),
160160
"source": get_beneficiary_paid_medicare_part_b_premiums_source(2024),
161161
"notes": get_beneficiary_paid_medicare_part_b_premiums_notes(2024),

policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
HARD_CODED_TOTALS = {
1010
"health_insurance_premiums_without_medicare_part_b": 385e9,
1111
"other_medical_expenses": 278e9,
12-
"medicare_part_b_premiums": 112e9,
12+
"medicare_part_b_premium": 112e9,
1313
"over_the_counter_health_expenses": 72e9,
1414
"spm_unit_spm_threshold": 3_945e9,
1515
"child_support_expense": 33e9,

policyengine_us_data/utils/loss.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
from policyengine_core.reforms import Reform
2020
from policyengine_us_data.utils.soi import pe_to_soi, get_soi
2121

22+
23+
MEDICARE_PART_B_PREMIUM_VARIABLE = "medicare_part_b_premium"
24+
2225
# National calibration targets consumed by build_loss_matrix().
2326
# These values are specific to 2024 — they should NOT be applied to
2427
# other years without re-sourcing. They are duplicated in
@@ -29,8 +32,8 @@
2932
HARD_CODED_TOTALS = {
3033
"health_insurance_premiums_without_medicare_part_b": 385e9,
3134
"other_medical_expenses": 278e9,
32-
"medicare_part_b_premiums": get_beneficiary_paid_medicare_part_b_premiums_target(
33-
2024
35+
MEDICARE_PART_B_PREMIUM_VARIABLE: (
36+
get_beneficiary_paid_medicare_part_b_premiums_target(2024)
3437
),
3538
"over_the_counter_health_expenses": 72e9,
3639
"spm_unit_spm_threshold": 3_945e9,
@@ -851,18 +854,21 @@ def build_loss_matrix(dataset: type, time_period):
851854
else:
852855
in_age_range = (age >= age_lower_bound) * (age < age_lower_bound + 10)
853856
label_suffix = f"age_{age_lower_bound}_to_{age_lower_bound + 9}"
854-
for expense_type in [
855-
"health_insurance_premiums_without_medicare_part_b",
856-
"over_the_counter_health_expenses",
857-
"other_medical_expenses",
858-
"medicare_part_b_premiums",
857+
for expense_type, target_column in [
858+
(
859+
"health_insurance_premiums_without_medicare_part_b",
860+
"health_insurance_premiums_without_medicare_part_b",
861+
),
862+
("over_the_counter_health_expenses", "over_the_counter_health_expenses"),
863+
("other_medical_expenses", "other_medical_expenses"),
864+
(MEDICARE_PART_B_PREMIUM_VARIABLE, "medicare_part_b_premiums"),
859865
]:
860866
label = f"nation/census/{expense_type}/{label_suffix}"
861867
value = sim.calculate(expense_type).values
862868
loss_matrix[label] = sim.map_result(
863869
in_age_range * value, "person", "household"
864870
)
865-
targets_array.append(row[expense_type])
871+
targets_array.append(row[target_column])
866872

867873
# AGI by SPM threshold totals
868874

policyengine_us_data/utils/policyengine.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -134,13 +134,3 @@ def has_policyengine_us_variables(*variables: str) -> bool:
134134
return False
135135

136136
return set(variables).issubset(available_variables)
137-
138-
139-
def supports_medicare_enrollment_input() -> bool:
140-
return has_policyengine_us_variables("medicare_enrolled")
141-
142-
143-
def supports_modeled_medicare_part_b_inputs() -> bool:
144-
return has_policyengine_us_variables(
145-
"medicare_part_b_premiums_reported",
146-
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ classifiers = [
2222
"Programming Language :: Python :: 3.14",
2323
]
2424
dependencies = [
25-
"policyengine-us>=1.637.0",
25+
"policyengine-us>=1.674.1",
2626
"policyengine-core>=3.23.6",
2727
"pandas>=2.3.1",
2828
"requests>=2.25.0",

0 commit comments

Comments
 (0)