PolicyEngine
diff --git a/‎changelog.d/630.fixed.md‎
Lines changed: 1 addition & 0 deletions b/‎changelog.d/630.fixed.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎policyengine_us_data/calibration/publish_local_area.py‎
Lines changed: 31 additions & 0 deletions b/‎policyengine_us_data/calibration/publish_local_area.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎policyengine_us_data/calibration/unified_matrix_builder.py‎
Lines changed: 38 additions & 0 deletions b/‎policyengine_us_data/calibration/unified_matrix_builder.py‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎policyengine_us_data/datasets/cps/census_cps.py‎
Lines changed: 74 additions & 5 deletions b/‎policyengine_us_data/datasets/cps/census_cps.py‎
Lines changed: 74 additions & 5 deletions
@@ -0,0 +1 @@
+Anchor ACA take-up to subsidized Marketplace coverage reports so unsubsidized exchange enrollment does not force premium tax credit take-up.
@@ -38,6 +38,7 @@
 from policyengine_us_data.utils.takeup import (
     SIMPLE_TAKEUP_VARS,
     apply_block_takeup_to_arrays,
+    reported_subsidized_marketplace_by_tax_unit,
 )
 
 CHECKPOINT_FILE = Path("completed_states.txt")
@@ -157,6 +158,34 @@ def record_completed_city(city_name: str):
         f.write(f"{city_name}\n")
 
 
+def _build_reported_takeup_anchors(
+    data: dict, time_period: int
+) -> dict[str, np.ndarray]:
+    reported_anchors = {}
+    if (
+        "reported_has_subsidized_marketplace_health_coverage_at_interview" in data
+        and time_period
+        in data["reported_has_subsidized_marketplace_health_coverage_at_interview"]
+    ):
+        reported_anchors["takes_up_aca_if_eligible"] = (
+            reported_subsidized_marketplace_by_tax_unit(
+                data["person_tax_unit_id"][time_period],
+                data["tax_unit_id"][time_period],
+                data[
+                    "reported_has_subsidized_marketplace_health_coverage_at_interview"
+                ][time_period],
+            )
+        )
+    if (
+        "has_medicaid_health_coverage_at_interview" in data
+        and time_period in data["has_medicaid_health_coverage_at_interview"]
+    ):
+        reported_anchors["takes_up_medicaid_if_eligible"] = data[
+            "has_medicaid_health_coverage_at_interview"
+        ][time_period].astype(bool)
+    return reported_anchors
+
+
 def build_h5(
     weights: np.ndarray,
     geography,
@@ -551,6 +580,7 @@ def build_h5(
         }
         hh_state_fips = clone_geo["state_fips"].astype(np.int32)
         original_hh_ids = household_ids[active_hh].astype(np.int64)
+        reported_anchors = _build_reported_takeup_anchors(data, time_period)
 
         takeup_results = apply_block_takeup_to_arrays(
             hh_blocks=active_blocks,
@@ -561,6 +591,7 @@ def build_h5(
             entity_counts=entity_counts,
             time_period=time_period,
             takeup_filter=takeup_filter,
+            reported_anchors=reported_anchors,
         )
         for var_name, bools in takeup_results.items():
             data[var_name] = {time_period: bools}
 
@@ -14,6 +14,7 @@
 from collections import defaultdict
 from typing import Dict, List, Optional, Tuple
 
+import h5py
 import numpy as np
 import pandas as pd
 from scipy import sparse
@@ -668,6 +669,7 @@ def _process_single_clone(
     entity_hh_idx_map = sd.get("entity_hh_idx_map", {})
     entity_to_person_idx = sd.get("entity_to_person_idx", {})
     precomputed_rates = sd.get("precomputed_rates", {})
+    reported_takeup_anchors = sd.get("reported_takeup_anchors", {})
 
     # Slice geography for this clone
     clone_states = geo_states[col_start:col_end]
@@ -789,6 +791,7 @@ def _process_single_clone(
                 ent_blocks,
                 ent_hh_ids,
                 ent_ci,
+                reported_mask=reported_takeup_anchors.get(takeup_var),
             )
 
             ent_values = (ent_eligible * ent_takeup).astype(np.float32)
@@ -2132,6 +2135,7 @@ def build_matrix(
             from policyengine_us_data.utils.takeup import (
                 TAKEUP_AFFECTED_TARGETS,
                 compute_block_takeup_for_entities,
+                reported_subsidized_marketplace_by_tax_unit,
             )
             from policyengine_us_data.parameters import (
                 load_take_up_rate,
@@ -2160,6 +2164,37 @@ def build_matrix(
                 "person": person_hh_indices,
             }
 
+            reported_takeup_anchors = {}
+            with h5py.File(self.dataset_path, "r") as f:
+                period_key = str(self.time_period)
+                if (
+                    "reported_has_subsidized_marketplace_health_coverage_at_interview"
+                    in f
+                    and period_key
+                    in f[
+                        "reported_has_subsidized_marketplace_health_coverage_at_interview"
+                    ]
+                ):
+                    person_marketplace = f[
+                        "reported_has_subsidized_marketplace_health_coverage_at_interview"
+                    ][period_key][...].astype(bool)
+                    person_tax_unit_ids = f["person_tax_unit_id"][period_key][...]
+                    tax_unit_ids = f["tax_unit_id"][period_key][...]
+                    reported_takeup_anchors["takes_up_aca_if_eligible"] = (
+                        reported_subsidized_marketplace_by_tax_unit(
+                            person_tax_unit_ids,
+                            tax_unit_ids,
+                            person_marketplace,
+                        )
+                    )
+                if (
+                    "has_medicaid_health_coverage_at_interview" in f
+                    and period_key in f["has_medicaid_health_coverage_at_interview"]
+                ):
+                    reported_takeup_anchors["takes_up_medicaid_if_eligible"] = f[
+                        "has_medicaid_health_coverage_at_interview"
+                    ][period_key][...].astype(bool)
+
             entity_to_person_idx = {}
             for entity_level in ("spm_unit", "tax_unit"):
                 ent_ids = sim.calculate(
@@ -2200,6 +2235,7 @@ def build_matrix(
             self.household_ids = household_ids
             self.precomputed_rates = precomputed_rates
             self.affected_target_info = affected_target_info
+            self.reported_takeup_anchors = reported_takeup_anchors
 
         # 5d. Clone loop
         from pathlib import Path
@@ -2249,6 +2285,7 @@ def build_matrix(
                 shared_data["entity_hh_idx_map"] = entity_hh_idx_map
                 shared_data["entity_to_person_idx"] = entity_to_person_idx
                 shared_data["precomputed_rates"] = precomputed_rates
+                shared_data["reported_takeup_anchors"] = reported_takeup_anchors
 
             logger.info(
                 "Starting parallel clone processing: %d clones, %d workers",
@@ -2452,6 +2489,7 @@ def build_matrix(
                             ent_blocks,
                             ent_hh_ids,
                             ent_ci,
+                            reported_mask=reported_takeup_anchors.get(takeup_var),
                         )
 
                         ent_values = (ent_eligible * ent_takeup).astype(np.float32)
 
@@ -7,6 +7,52 @@
 from policyengine_us_data.storage import STORAGE_FOLDER
 
 
+OPTIONAL_PERSON_COLUMNS = {
+    "NOW_COV",
+    "NOW_DIR",
+    "NOW_MRK",
+    "NOW_MRKS",
+    "NOW_MRKUN",
+    "NOW_NONM",
+    "NOW_PRIV",
+    "NOW_PUB",
+    "NOW_GRP",
+    "NOW_CAID",
+    "NOW_MCAID",
+    "NOW_PCHIP",
+    "NOW_OTHMT",
+    "NOW_MCARE",
+    "NOW_MIL",
+    "NOW_CHAMPVA",
+    "NOW_VACARE",
+    "NOW_IHSFLG",
+}
+
+
+def _resolve_person_usecols(
+    available_columns, spm_unit_columns: list[str]
+) -> list[str]:
+    requested_columns = PERSON_COLUMNS + spm_unit_columns + TAX_UNIT_COLUMNS
+    available_columns = set(available_columns)
+    missing_required = sorted(
+        column
+        for column in requested_columns
+        if column not in available_columns and column not in OPTIONAL_PERSON_COLUMNS
+    )
+    if missing_required:
+        raise KeyError(
+            "Missing required CPS person columns: " + ", ".join(missing_required[:10])
+        )
+    return [column for column in requested_columns if column in available_columns]
+
+
+def _fill_missing_optional_person_columns(person: pd.DataFrame) -> pd.DataFrame:
+    for column in OPTIONAL_PERSON_COLUMNS:
+        if column not in person.columns:
+            person[column] = 0
+    return person
+
+
 class CensusCPS(Dataset):
     """Dataset containing CPS ASEC tables in the Census format."""
 
@@ -59,12 +105,19 @@ def generate(self):
                     file_prefix = "cpspb/asec/prod/data/2019/"
                 else:
                     file_prefix = ""
-                with zipfile.open(f"{file_prefix}pppub{file_year_code}.csv") as f:
-                    storage["person"] = pd.read_csv(
+                person_path = f"{file_prefix}pppub{file_year_code}.csv"
+                with zipfile.open(person_path) as f:
+                    person_columns = pd.read_csv(f, nrows=0).columns
+                person_usecols = _resolve_person_usecols(
+                    person_columns, spm_unit_columns
+                )
+                with zipfile.open(person_path) as f:
+                    person = pd.read_csv(
                         f,
-                        usecols=PERSON_COLUMNS + spm_unit_columns + TAX_UNIT_COLUMNS,
+                        usecols=person_usecols,
                     ).fillna(0)
-                    person = storage["person"]
+                person = _fill_missing_optional_person_columns(person)
+                storage["person"] = person
                 with zipfile.open(f"{file_prefix}ffpub{file_year_code}.csv") as f:
                     person_family_id = person.PH_SEQ * 10 + person.PF_SEQ
                     family = pd.read_csv(f).fillna(0)
@@ -236,7 +289,24 @@ class CensusCPS_2018(CensusCPS):
     "A_AGE",
     "A_SEX",
     "PEDISEYE",
+    "NOW_COV",
+    "NOW_DIR",
     "NOW_MRK",
+    "NOW_MRKS",
+    "NOW_MRKUN",
+    "NOW_NONM",
+    "NOW_PRIV",
+    "NOW_PUB",
+    "NOW_GRP",
+    "NOW_CAID",
+    "NOW_MCAID",
+    "NOW_PCHIP",
+    "NOW_OTHMT",
+    "NOW_MCARE",
+    "NOW_MIL",
+    "NOW_CHAMPVA",
+    "NOW_VACARE",
+    "NOW_IHSFLG",
     "WSAL_VAL",
     "INT_VAL",
     "SEMP_VAL",
@@ -294,7 +364,6 @@ class CensusCPS_2018(CensusCPS):
     "PMED_VAL",
     "PEMCPREM",
     "PRCITSHP",
-    "NOW_GRP",
     "POCCU2",
     "PEINUSYR",
     "MCARE",
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Anchor ACA take-up to subsidized Marketplace coverage reports so unsubsidized exchange enrollment does not force premium tax credit take-up.`