Convert WIC float draws to bools for consistency with other takeup variables

baogorek · claude · juaristi22 · commit 7a3230689e03 · 2026-02-12T14:11:04.000+05:30
Resolves category-specific rate comparisons at data generation time so only
bools (would_claim_wic, wic_nutritional_risk_imputed) are stored in the
dataset, matching the pattern used by all other takeup variables.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
@@ -268,13 +268,24 @@ def add_takeup(self):
     rng = seeded_rng("meets_ssi_resource_test")
     data["meets_ssi_resource_test"] = rng.random(n_persons) < ssi_pass_rate
 
-    # WIC draws (country package compares against category-specific rates)
-    rng = seeded_rng("wic_takeup_draw")
-    data["wic_takeup_draw"] = rng.random(n_persons).astype(np.float32)
+    # WIC: resolve draws to bools using category-specific rates
+    wic_categories = baseline.calculate("wic_category_str").values
+    wic_takeup_rates = load_take_up_rate("wic_takeup", self.time_period)
+    wic_takeup_rate_by_person = np.array(
+        [wic_takeup_rates.get(c, 0) for c in wic_categories]
+    )
+    rng = seeded_rng("would_claim_wic")
+    data["would_claim_wic"] = rng.random(n_persons) < wic_takeup_rate_by_person
 
-    rng = seeded_rng("wic_nutritional_risk_draw")
-    data["wic_nutritional_risk_draw"] = rng.random(n_persons).astype(
-        np.float32
+    wic_risk_rates = load_take_up_rate(
+        "wic_nutritional_risk", self.time_period
+    )
+    wic_risk_rate_by_person = np.array(
+        [wic_risk_rates.get(c, 0) for c in wic_categories]
+    )
+    rng = seeded_rng("wic_nutritional_risk_imputed")
+    data["wic_nutritional_risk_imputed"] = (
+        rng.random(n_persons) < wic_risk_rate_by_person
     )
 
     self.save_dataset(data)
diff --git a/policyengine_us_data/parameters/__init__.py b/policyengine_us_data/parameters/__init__.py
@@ -35,6 +35,20 @@ def load_take_up_rate(variable_name: str, year: int = 2018):
     if "rates_by_state" in data:
         return data["rates_by_state"]
 
+    # WIC-style: rates by category (each category has a time series)
+    if "rates_by_category" in data:
+        result = {}
+        for category, time_series in data["rates_by_category"].items():
+            applicable_value = None
+            for y, value in sorted(time_series.items()):
+                if int(y) <= year:
+                    applicable_value = value
+                else:
+                    break
+            if applicable_value is not None:
+                result[category] = applicable_value
+        return result
+
     # Standard time-series values
     values = data["values"]
     applicable_value = None
diff --git a/policyengine_us_data/parameters/take_up/wic_nutritional_risk.yaml b/policyengine_us_data/parameters/take_up/wic_nutritional_risk.yaml
@@ -0,0 +1,13 @@
+rates_by_category:
+  PREGNANT:
+    1980: 0.913
+  POSTPARTUM:
+    1980: 0.933
+  BREASTFEEDING:
+    1980: 0.889
+  INFANT:
+    1980: 0.95
+  CHILD:
+    1980: 0.752
+  NONE:
+    1980: 0
diff --git a/policyengine_us_data/parameters/take_up/wic_takeup.yaml b/policyengine_us_data/parameters/take_up/wic_takeup.yaml
@@ -0,0 +1,33 @@
+rates_by_category:
+  PREGNANT:
+    2018: 0.533
+    2019: 0.523
+    2020: 0.456
+    2021: 0.437
+    2022: 0.456
+  POSTPARTUM:
+    2018: 0.844
+    2019: 0.847
+    2020: 0.685
+    2021: 0.672
+    2022: 0.689
+  BREASTFEEDING:
+    2018: 0.687
+    2019: 0.684
+    2020: 0.604
+    2021: 0.608
+    2022: 0.663
+  INFANT:
+    2018: 0.978
+    2019: 0.984
+    2020: 0.817
+    2021: 0.78
+    2022: 0.784
+  CHILD:
+    2018: 0.442
+    2019: 0.448
+    2020: 0.406
+    2021: 0.432
+    2022: 0.46
+  NONE:
+    2018: 0
diff --git a/policyengine_us_data/tests/test_stochastic_variables.py b/policyengine_us_data/tests/test_stochastic_variables.py
@@ -117,12 +117,31 @@ def test_boolean_generation(self):
         assert take_up.dtype == bool
         assert set(take_up).issubset({True, False})
 
-    def test_wic_draws_are_float(self):
-        rng = seeded_rng("wic_takeup_draw")
-        draws = rng.random(1000).astype(np.float32)
-        assert draws.dtype == np.float32
-        assert np.all(draws >= 0)
-        assert np.all(draws < 1)
+    def test_wic_takeup_rates_load(self):
+        rates = load_take_up_rate("wic_takeup", 2022)
+        assert isinstance(rates, dict)
+        assert rates["PREGNANT"] == 0.456
+        assert rates["INFANT"] == 0.784
+        assert rates["NONE"] == 0
+
+    def test_wic_nutritional_risk_rates_load(self):
+        rates = load_take_up_rate("wic_nutritional_risk", 2022)
+        assert isinstance(rates, dict)
+        assert rates["INFANT"] == 0.95
+        assert rates["CHILD"] == 0.752
+        assert rates["NONE"] == 0
+
+    def test_wic_category_specific_proportions(self):
+        rates = load_take_up_rate("wic_takeup", 2022)
+        n = 10_000
+        rng = seeded_rng("would_claim_wic")
+        draws = rng.random(n)
+        for category, expected_rate in [
+            ("INFANT", 0.784),
+            ("CHILD", 0.46),
+        ]:
+            take_up = draws[:n] < expected_rate
+            assert abs(take_up.mean() - expected_rate) < 0.05
 
     def test_state_specific_medicaid_proportions(self):
         rates = load_take_up_rate("medicaid", 2022)