Derive other health insurance premiums (#844)

daphnehanse11 · MaxGhenis · web-flow · commit c9be05c891c4 · 2026-04-29T17:03:27.000-04:00
* Residualize modeled health premiums

* Store residual health insurance premiums

* Rename other health insurance premium input

* Gate Part B target name on installed model

* Keep legacy Part B premium input for current model

* Emit decomposed premium input for current builds

* Require clean MOOP policyengine-us version

* Pin policyengine-us release for MOOP decomposition

---------

Co-authored-by: Max Ghenis &lt;mghenis@gmail.com&gt;
diff --git a/changelog.d/8089.fixed.md b/changelog.d/8089.fixed.md
@@ -0,0 +1 @@
+Added other health insurance premiums as the non-Medicare premium category not covered by modeled Marketplace, CHIP, or Medicaid premiums.
diff --git a/policyengine_us_data/calibration/target_config.yaml b/policyengine_us_data/calibration/target_config.yaml
@@ -122,7 +122,7 @@ include:
     geo_level: national
   - variable: medicaid
     geo_level: national
-  - variable: medicare_part_b_premiums
+  - variable: medicare_part_b_premium
     geo_level: national
   - variable: other_medical_expenses
     geo_level: national
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
@@ -50,11 +50,6 @@
 from policyengine_us_data.utils.asset_imputation import (
     build_household_vehicle_receiver,
 )
-from policyengine_us_data.utils.policyengine import (
-    supports_medicare_enrollment_input,
-    supports_modeled_medicare_part_b_inputs,
-)
-
 
 CURRENT_HEALTH_COVERAGE_REPORTED_VAR_MAP = {
     "reported_has_direct_purchase_health_coverage_at_interview": "NOW_DIR",
@@ -193,6 +188,8 @@ def generate(self):
         add_takeup(self)
         logging.info("Imputing Marketplace plan benchmark ratio")
         add_marketplace_plan_benchmark_ratio(self)
+        logging.info("Deriving other health insurance premiums")
+        derive_other_health_insurance_premiums(self)
         logging.info("Downsampling")
 
         # Downsample
@@ -519,6 +516,124 @@ def add_marketplace_plan_benchmark_ratio(self):
     self.save_dataset(data)
 
 
+OTHER_HEALTH_INSURANCE_PREMIUM_TARGETS = {
+    "other_health_insurance_premiums": {
+        "reported_variable": "health_insurance_premiums_without_medicare_part_b",
+        "modeled_variables": (
+            "chip_premium",
+            "marketplace_net_premium",
+            "medicaid_premium",
+        ),
+    },
+}
+
+
+def derive_other_health_insurance_premiums(self):
+    """Create other premium inputs net of baseline computed premiums.
+
+    The model adds computed premiums back explicitly, so it needs a separate
+    other-premium input for the parts of CPS-reported non-Medicare premiums
+    not explained by baseline computed Marketplace, CHIP, or Medicaid
+    premiums. The original CPS-reported premium inputs remain unchanged as raw
+    source fields. The data package requires a policyengine-us release with
+    these modeled premium variables, so missing variables fail fast instead of
+    silently producing an incomplete decomposition.
+    """
+    from policyengine_us import Microsimulation
+
+    data = self.load_dataset()
+    baseline = Microsimulation(dataset=self)
+    tbs = baseline.tax_benefit_system
+    period = self.time_period
+    changed = False
+
+    for output_variable, config in OTHER_HEALTH_INSURANCE_PREMIUM_TARGETS.items():
+        reported_variable = config["reported_variable"]
+        premium_variables = config["modeled_variables"]
+
+        if reported_variable not in data:
+            continue
+
+        computed_premium = np.zeros(len(data[reported_variable]), dtype=float)
+        for variable in premium_variables:
+            values = np.asarray(
+                baseline.calculate(variable, period=period).values,
+                dtype=float,
+            )
+            computed_premium += _premium_values_to_person(
+                data=data,
+                source_entity=tbs.variables[variable].entity.key,
+                values=values,
+            )
+
+        data[output_variable] = compute_other_health_insurance_premiums(
+            reported_premium=data[reported_variable],
+            baseline_computed_premium=computed_premium,
+        )
+        logging.info(
+            "Created %s from %s by subtracting baseline computed premiums: %s",
+            output_variable,
+            reported_variable,
+            ", ".join(premium_variables),
+        )
+        changed = True
+
+    if changed:
+        self.save_dataset(data)
+
+
+def compute_other_health_insurance_premiums(
+    reported_premium: np.ndarray,
+    baseline_computed_premium: np.ndarray,
+) -> np.ndarray:
+    """Return other premiums after subtracting baseline computed premiums."""
+    return np.asarray(reported_premium, dtype=float) - np.asarray(
+        baseline_computed_premium, dtype=float
+    )
+
+
+def _premium_values_to_person(
+    data: dict,
+    source_entity: str,
+    values: np.ndarray,
+) -> np.ndarray:
+    """Map computed premiums to person rows for person-level premium accounting."""
+    person_ids = data["person_id"]
+    if source_entity == "person":
+        if len(values) != len(person_ids):
+            raise ValueError(
+                "Person-level computed premium length does not match person rows: "
+                f"got {len(values)}, expected {len(person_ids)}."
+            )
+        return values
+
+    entity_id_variable = f"{source_entity}_id"
+    person_entity_id_variable = f"person_{source_entity}_id"
+    if entity_id_variable not in data or person_entity_id_variable not in data:
+        raise ValueError(
+            f"Cannot allocate {source_entity}-level premiums to people: missing "
+            f"{entity_id_variable} or {person_entity_id_variable}."
+        )
+
+    entity_ids = data[entity_id_variable]
+    person_entity_ids = data[person_entity_id_variable]
+    if len(values) != len(entity_ids):
+        raise ValueError(
+            f"{source_entity}-level computed premium length does not match "
+            f"{source_entity} rows: got {len(values)}, expected {len(entity_ids)}."
+        )
+
+    entity_position = {entity_id: index for index, entity_id in enumerate(entity_ids)}
+    allocated = np.zeros(len(person_ids), dtype=float)
+    seen_entities = set()
+    for person_index, entity_id in enumerate(person_entity_ids):
+        if entity_id in seen_entities:
+            continue
+        allocated[person_index] = values[entity_position[entity_id]]
+        seen_entities.add(entity_id)
+    return allocated
+
+
 MARKETPLACE_PLAN_BENCHMARK_RATIO_MIN = 0.5
 MARKETPLACE_PLAN_BENCHMARK_RATIO_MAX = 1.5
 
@@ -1009,12 +1124,7 @@ def add_personal_income_variables(cps: h5py.File, person: DataFrame, year: int):
     cps["health_insurance_premiums_without_medicare_part_b"] = person.PHIP_VAL
     cps["over_the_counter_health_expenses"] = person.POTC_VAL
     cps["other_medical_expenses"] = person.PMED_VAL
-    if supports_medicare_enrollment_input():
-        cps["medicare_enrolled"] = person.MCARE == 1
-    if supports_modeled_medicare_part_b_inputs():
-        cps["medicare_part_b_premiums_reported"] = person.PEMCPREM
-    else:
-        cps["medicare_part_b_premiums"] = person.PEMCPREM
+    cps["medicare_enrolled"] = person.MCARE == 1
 
     # Get QBI simulation parameters ---
     yamlfilename = (
diff --git a/policyengine_us_data/datasets/cps/extended_cps.py b/policyengine_us_data/datasets/cps/extended_cps.py
@@ -19,9 +19,6 @@
     impute_tax_unit_mortgage_balance_hints,
 )
 from policyengine_us_data.utils.policyengine import has_policyengine_us_variables
-from policyengine_us_data.utils.policyengine import (
-    supports_modeled_medicare_part_b_inputs,
-)
 from policyengine_us_data.utils.retirement_limits import (
     get_retirement_limits,
     get_se_pension_limits,
@@ -151,6 +148,7 @@ def _supports_structural_mortgage_inputs() -> bool:
     "spm_unit_pre_subsidy_childcare_expenses",
     # Medical expenses
     "health_insurance_premiums_without_medicare_part_b",
+    "other_health_insurance_premiums",
     "over_the_counter_health_expenses",
     "other_medical_expenses",
     "child_support_expense",
@@ -166,9 +164,6 @@ def _supports_structural_mortgage_inputs() -> bool:
     "self_employment_income_last_year",
 ]
 
-if not supports_modeled_medicare_part_b_inputs():
-    CPS_ONLY_IMPUTED_VARIABLES.append("medicare_part_b_premiums")
-
 # Set for O(1) lookup in the splice loop.
 _CPS_ONLY_SET = set(CPS_ONLY_IMPUTED_VARIABLES)
 
diff --git a/policyengine_us_data/datasets/puf/puf.py b/policyengine_us_data/datasets/puf/puf.py
@@ -17,7 +17,9 @@
     STRUCTURAL_MORTGAGE_VARIABLES,
     convert_mortgage_interest_to_structural_inputs,
 )
-from policyengine_us_data.utils.policyengine import has_policyengine_us_variables
+from policyengine_us_data.utils.policyengine import (
+    has_policyengine_us_variables,
+)
 from policyengine_us_data.utils.uprating import (
     create_policyengine_uprating_factors_table,
 )
@@ -984,7 +986,7 @@ class PUF_2024(PUF):
     "health_insurance_premiums_without_medicare_part_b": 0.453,
     "other_medical_expenses": 0.325,
     "over_the_counter_health_expenses": 0.085,
-    "medicare_part_b_premiums": 0.137,
+    "medicare_part_b_premium": 0.137,
 }
 
 if __name__ == "__main__":
diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py
@@ -155,7 +155,7 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
             "year": 2024,
         },
         {
-            "variable": "medicare_part_b_premiums",
+            "variable": "medicare_part_b_premium",
             "value": get_beneficiary_paid_medicare_part_b_premiums_target(2024),
             "source": get_beneficiary_paid_medicare_part_b_premiums_source(2024),
             "notes": get_beneficiary_paid_medicare_part_b_premiums_notes(2024),
diff --git a/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py b/policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py
@@ -9,7 +9,7 @@
 HARD_CODED_TOTALS = {
     "health_insurance_premiums_without_medicare_part_b": 385e9,
     "other_medical_expenses": 278e9,
-    "medicare_part_b_premiums": 112e9,
+    "medicare_part_b_premium": 112e9,
     "over_the_counter_health_expenses": 72e9,
     "spm_unit_spm_threshold": 3_945e9,
     "child_support_expense": 33e9,
diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py
@@ -19,6 +19,9 @@
 from policyengine_core.reforms import Reform
 from policyengine_us_data.utils.soi import pe_to_soi, get_soi
 
+
+MEDICARE_PART_B_PREMIUM_VARIABLE = "medicare_part_b_premium"
+
 # National calibration targets consumed by build_loss_matrix().
 # These values are specific to 2024 — they should NOT be applied to
 # other years without re-sourcing.  They are duplicated in
@@ -29,8 +32,8 @@
 HARD_CODED_TOTALS = {
     "health_insurance_premiums_without_medicare_part_b": 385e9,
     "other_medical_expenses": 278e9,
-    "medicare_part_b_premiums": get_beneficiary_paid_medicare_part_b_premiums_target(
-        2024
+    MEDICARE_PART_B_PREMIUM_VARIABLE: (
+        get_beneficiary_paid_medicare_part_b_premiums_target(2024)
     ),
     "over_the_counter_health_expenses": 72e9,
     "spm_unit_spm_threshold": 3_945e9,
@@ -851,18 +854,21 @@ def build_loss_matrix(dataset: type, time_period):
         else:
             in_age_range = (age >= age_lower_bound) * (age < age_lower_bound + 10)
             label_suffix = f"age_{age_lower_bound}_to_{age_lower_bound + 9}"
-        for expense_type in [
-            "health_insurance_premiums_without_medicare_part_b",
-            "over_the_counter_health_expenses",
-            "other_medical_expenses",
-            "medicare_part_b_premiums",
+        for expense_type, target_column in [
+            (
+                "health_insurance_premiums_without_medicare_part_b",
+                "health_insurance_premiums_without_medicare_part_b",
+            ),
+            ("over_the_counter_health_expenses", "over_the_counter_health_expenses"),
+            ("other_medical_expenses", "other_medical_expenses"),
+            (MEDICARE_PART_B_PREMIUM_VARIABLE, "medicare_part_b_premiums"),
         ]:
             label = f"nation/census/{expense_type}/{label_suffix}"
             value = sim.calculate(expense_type).values
             loss_matrix[label] = sim.map_result(
                 in_age_range * value, "person", "household"
             )
-            targets_array.append(row[expense_type])
+            targets_array.append(row[target_column])
 
     # AGI by SPM threshold totals
 
diff --git a/policyengine_us_data/utils/policyengine.py b/policyengine_us_data/utils/policyengine.py
@@ -134,13 +134,3 @@ def has_policyengine_us_variables(*variables: str) -> bool:
         return False
 
     return set(variables).issubset(available_variables)
-
-
-def supports_medicare_enrollment_input() -> bool:
-    return has_policyengine_us_variables("medicare_enrolled")
-
-
-def supports_modeled_medicare_part_b_inputs() -> bool:
-    return has_policyengine_us_variables(
-        "medicare_part_b_premiums_reported",
-    )
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,7 +22,7 @@ classifiers = [
     "Programming Language :: Python :: 3.14",
 ]
 dependencies = [
-    "policyengine-us>=1.637.0",
+    "policyengine-us>=1.674.1",
     "policyengine-core>=3.23.6",
     "pandas>=2.3.1",
     "requests>=2.25.0",
diff --git a/tests/unit/calibration/test_target_config.py b/tests/unit/calibration/test_target_config.py
@@ -206,6 +206,21 @@ def test_training_config_includes_national_ctc_agi_targets(self):
             "domain_variable": "adjusted_gross_income,non_refundable_ctc",
         } in include_rules
 
+    def test_training_config_includes_medicare_part_b_target(self):
+        config = load_target_config(
+            str(
+                Path(__file__).resolve().parents[3]
+                / "policyengine_us_data"
+                / "calibration"
+                / "target_config.yaml"
+            )
+        )
+
+        assert {
+            "variable": "medicare_part_b_premium",
+            "geo_level": "national",
+        } in config["include"]
+
     def test_training_config_includes_district_non_refundable_ctc_target(self):
         config = load_target_config(
             str(
diff --git a/tests/unit/datasets/test_other_health_insurance_premiums.py b/tests/unit/datasets/test_other_health_insurance_premiums.py
diff --git a/tests/unit/test_medical_expense_inputs.py b/tests/unit/test_medical_expense_inputs.py
diff --git a/tests/unit/test_medicare_part_b_inputs.py b/tests/unit/test_medicare_part_b_inputs.py
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Added other health insurance premiums as the non-Medicare premium category not covered by modeled Marketplace, CHIP, or Medicaid premiums.`
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,9 @@`
`17`	`17`	`STRUCTURAL_MORTGAGE_VARIABLES,`
`18`	`18`	`convert_mortgage_interest_to_structural_inputs,`
`19`	`19`	`)`
`20`		`-from policyengine_us_data.utils.policyengine import has_policyengine_us_variables`
	`20`	`+from policyengine_us_data.utils.policyengine import (`
	`21`	`+ has_policyengine_us_variables,`
	`22`	`+)`
`21`	`23`	`from policyengine_us_data.utils.uprating import (`
`22`	`24`	`create_policyengine_uprating_factors_table,`
`23`	`25`	`)`
`@@ -984,7 +986,7 @@ class PUF_2024(PUF):`
`984`	`986`	`"health_insurance_premiums_without_medicare_part_b": 0.453,`
`985`	`987`	`"other_medical_expenses": 0.325,`
`986`	`988`	`"over_the_counter_health_expenses": 0.085,`
`987`		`- "medicare_part_b_premiums": 0.137,`
	`989`	`+ "medicare_part_b_premium": 0.137,`
`988`	`990`	`}`
`989`	`991`
`990`	`992`	`if __name__ == "__main__":`
Original file line number	Diff line number	Diff line change
`@@ -155,7 +155,7 @@ def extract_national_targets(year: int = DEFAULT_YEAR):`
`155`	`155`	`"year": 2024,`
`156`	`156`	`},`
`157`	`157`	`{`
`158`		`- "variable": "medicare_part_b_premiums",`
	`158`	`+ "variable": "medicare_part_b_premium",`
`159`	`159`	`"value": get_beneficiary_paid_medicare_part_b_premiums_target(2024),`
`160`	`160`	`"source": get_beneficiary_paid_medicare_part_b_premiums_source(2024),`
`161`	`161`	`"notes": get_beneficiary_paid_medicare_part_b_premiums_notes(2024),`
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ classifiers = [`
`22`	`22`	`"Programming Language :: Python :: 3.14",`
`23`	`23`	`]`
`24`	`24`	`dependencies = [`
`25`		`- "policyengine-us>=1.637.0",`
	`25`	`+ "policyengine-us>=1.674.1",`
`26`	`26`	`"policyengine-core>=3.23.6",`
`27`	`27`	`"pandas>=2.3.1",`
`28`	`28`	`"requests>=2.25.0",`