Skip to content

Commit 09641de

Browse files
authored
Merge pull request #451 from PolicyEngine/migrate-random-to-data-upstream
Move all randomness to data package for deterministic country package
2 parents b701a73 + eafd438 commit 09641de

15 files changed

Lines changed: 537 additions & 14 deletions

changelog_entry.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
- bump: minor
2+
changes:
3+
added:
4+
- Name-based seeding (seeded_rng) for order-independent reproducibility
5+
- State-specific Medicaid takeup rates (53%-99% range, 51 jurisdictions)
6+
- SSI resource test pass rate parameter (0.4)
7+
- WIC takeup and nutritional risk draw variables (float)
8+
- meets_ssi_resource_test boolean generation
9+
changed:
10+
- Replaced shared RNG (seed=100) with per-variable name-based seeding
11+
- Medicaid takeup now uses state-specific rates instead of uniform 93%

policyengine_us_data/datasets/cps/cps.py

Lines changed: 89 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
)
1515
from microimpute.models.qrf import QRF
1616
import logging
17+
from policyengine_us_data.parameters import load_take_up_rate
18+
from policyengine_us_data.utils.randomness import seeded_rng
1719

1820

1921
class CPS(Dataset):
@@ -191,28 +193,101 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
191193
def add_takeup(self):
192194
data = self.load_dataset()
193195

194-
from policyengine_us import system, Microsimulation
196+
from policyengine_us import Microsimulation
195197

196198
baseline = Microsimulation(dataset=self)
197-
parameters = baseline.tax_benefit_system.parameters(self.time_period)
198199

199-
generator = np.random.default_rng(seed=100)
200+
n_persons = len(data["person_id"])
201+
n_tax_units = len(data["tax_unit_id"])
202+
n_spm_units = len(data["spm_unit_id"])
203+
204+
# Load take-up rates
205+
eitc_rates_by_children = load_take_up_rate("eitc", self.time_period)
206+
dc_ptc_rate = load_take_up_rate("dc_ptc", self.time_period)
207+
snap_rate = load_take_up_rate("snap", self.time_period)
208+
aca_rate = load_take_up_rate("aca", self.time_period)
209+
medicaid_rates_by_state = load_take_up_rate("medicaid", self.time_period)
210+
head_start_rate = load_take_up_rate("head_start", self.time_period)
211+
early_head_start_rate = load_take_up_rate(
212+
"early_head_start", self.time_period
213+
)
214+
ssi_pass_rate = load_take_up_rate("ssi_pass_rate", self.time_period)
200215

201-
eitc_takeup_rates = parameters.gov.irs.credits.eitc.takeup
216+
# EITC: varies by number of children
202217
eitc_child_count = baseline.calculate("eitc_child_count").values
203-
eitc_takeup_rate = eitc_takeup_rates.calc(eitc_child_count)
204-
data["takes_up_eitc"] = (
205-
generator.random(len(data["tax_unit_id"])) < eitc_takeup_rate
218+
eitc_takeup_rate = np.array(
219+
[
220+
eitc_rates_by_children.get(min(int(c), 3), 0.85)
221+
for c in eitc_child_count
222+
]
223+
)
224+
rng = seeded_rng("takes_up_eitc")
225+
data["takes_up_eitc"] = rng.random(n_tax_units) < eitc_takeup_rate
226+
227+
# DC Property Tax Credit
228+
rng = seeded_rng("takes_up_dc_ptc")
229+
data["takes_up_dc_ptc"] = rng.random(n_tax_units) < dc_ptc_rate
230+
231+
# SNAP
232+
rng = seeded_rng("takes_up_snap_if_eligible")
233+
data["takes_up_snap_if_eligible"] = rng.random(n_spm_units) < snap_rate
234+
235+
# ACA
236+
rng = seeded_rng("takes_up_aca_if_eligible")
237+
data["takes_up_aca_if_eligible"] = rng.random(n_tax_units) < aca_rate
238+
239+
# Medicaid: state-specific rates
240+
state_codes = baseline.calculate("state_code_str").values
241+
hh_ids = data["household_id"]
242+
person_hh_ids = data["person_household_id"]
243+
hh_to_state = dict(zip(hh_ids, state_codes))
244+
person_states = np.array(
245+
[hh_to_state.get(hh_id, "CA") for hh_id in person_hh_ids]
206246
)
207-
dc_ptc_takeup_rate = parameters.gov.states.dc.tax.income.credits.ptc.takeup
208-
data["takes_up_dc_ptc"] = (
209-
generator.random(len(data["tax_unit_id"])) < dc_ptc_takeup_rate
247+
medicaid_rate_by_person = np.array(
248+
[medicaid_rates_by_state.get(s, 0.93) for s in person_states]
249+
)
250+
rng = seeded_rng("takes_up_medicaid_if_eligible")
251+
data["takes_up_medicaid_if_eligible"] = (
252+
rng.random(n_persons) < medicaid_rate_by_person
253+
)
254+
255+
# Head Start
256+
rng = seeded_rng("takes_up_head_start_if_eligible")
257+
data["takes_up_head_start_if_eligible"] = (
258+
rng.random(n_persons) < head_start_rate
259+
)
260+
261+
# Early Head Start
262+
rng = seeded_rng("takes_up_early_head_start_if_eligible")
263+
data["takes_up_early_head_start_if_eligible"] = (
264+
rng.random(n_persons) < early_head_start_rate
210265
)
211-
generator = np.random.default_rng(seed=100)
212266

213-
data["snap_take_up_seed"] = generator.random(len(data["spm_unit_id"]))
214-
data["aca_take_up_seed"] = generator.random(len(data["tax_unit_id"]))
215-
data["medicaid_take_up_seed"] = generator.random(len(data["person_id"]))
267+
# SSI resource test
268+
rng = seeded_rng("meets_ssi_resource_test")
269+
data["meets_ssi_resource_test"] = rng.random(n_persons) < ssi_pass_rate
270+
271+
# WIC: resolve draws to bools using category-specific rates
272+
wic_categories = baseline.calculate("wic_category_str").values
273+
wic_takeup_rates = load_take_up_rate("wic_takeup", self.time_period)
274+
wic_takeup_rate_by_person = np.array(
275+
[wic_takeup_rates.get(c, 0) for c in wic_categories]
276+
)
277+
rng = seeded_rng("would_claim_wic")
278+
data["would_claim_wic"] = rng.random(n_persons) < wic_takeup_rate_by_person
279+
280+
# WIC nutritional risk — fully resolved
281+
wic_risk_rates = load_take_up_rate(
282+
"wic_nutritional_risk", self.time_period
283+
)
284+
wic_risk_rate_by_person = np.array(
285+
[wic_risk_rates.get(c, 0) for c in wic_categories]
286+
)
287+
receives_wic = baseline.calculate("receives_wic").values
288+
rng = seeded_rng("is_wic_at_nutritional_risk")
289+
imputed_risk = rng.random(n_persons) < wic_risk_rate_by_person
290+
data["is_wic_at_nutritional_risk"] = receives_wic | imputed_risk
216291

217292
self.save_dataset(data)
218293

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""
2+
Take-up rate parameters for stochastic simulation.
3+
4+
These parameters are stored in the data package to keep the country package
5+
as a purely deterministic rules engine.
6+
"""
7+
8+
import yaml
9+
from pathlib import Path
10+
11+
PARAMETERS_DIR = Path(__file__).parent
12+
13+
14+
def load_take_up_rate(variable_name: str, year: int = 2018):
15+
"""Load take-up rate from YAML parameter files.
16+
17+
Args:
18+
variable_name: Name of the take-up parameter file (without .yaml)
19+
year: Year for which to get the rate
20+
21+
Returns:
22+
float, dict (EITC rates_by_children), or dict (Medicaid
23+
rates_by_state)
24+
"""
25+
yaml_path = PARAMETERS_DIR / "take_up" / f"{variable_name}.yaml"
26+
27+
with open(yaml_path) as f:
28+
data = yaml.safe_load(f)
29+
30+
# EITC: rates by number of children
31+
if "rates_by_children" in data:
32+
return data["rates_by_children"]
33+
34+
# Medicaid: state-specific rates
35+
if "rates_by_state" in data:
36+
return data["rates_by_state"]
37+
38+
# WIC-style: rates by category (each category has a time series)
39+
if "rates_by_category" in data:
40+
result = {}
41+
for category, time_series in data["rates_by_category"].items():
42+
applicable_value = None
43+
for y, value in sorted(time_series.items()):
44+
if int(y) <= year:
45+
applicable_value = value
46+
else:
47+
break
48+
if applicable_value is not None:
49+
result[category] = applicable_value
50+
return result
51+
52+
# Standard time-series values
53+
values = data["values"]
54+
applicable_value = None
55+
56+
for date_key, value in sorted(values.items()):
57+
if hasattr(date_key, "year"):
58+
date_year = date_key.year
59+
else:
60+
date_year = int(date_key.split("-")[0])
61+
62+
if date_year <= year:
63+
applicable_value = value
64+
else:
65+
break
66+
67+
if applicable_value is None:
68+
raise ValueError(
69+
f"No take-up rate found for {variable_name} in {year}"
70+
)
71+
72+
return applicable_value
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
description: Percentage of eligible people who do enroll in Affordable Care Act coverage, if eligible.
2+
metadata:
3+
label: ACA takeup rate
4+
unit: /1
5+
period: year
6+
reference:
7+
- title: KFF "A Closer Look at the Remaining Uninsured Population Eligible for Medicaid and CHIP"
8+
href: https://www.kff.org/uninsured/issue-brief/a-closer-look-at-the-remaining-uninsured-population-eligible-for-medicaid-and-chip/#:~:text=the%20uninsured%20rate%20dropped%20to,States%20began%20the
9+
values:
10+
2018-01-01: 0.672
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
description: The share of eligible individuals who claim the DC property tax credit.
2+
metadata:
3+
unit: /1
4+
label: DC property tax credit takeup rate
5+
period: year
6+
reference:
7+
- title: District of Columbia Tax Expenditure Report, 2024
8+
href: https://ora-cfo.dc.gov/sites/default/files/dc/sites/ora-cfo/publication/attachments/2024%20Tax%20Expenditure%20Report.pdf#page=234
9+
values:
10+
# 37,133 (from 2024 Tax Expenditure Report) / 131,791,388 (PolicyEngine DC PTC value estimate)
11+
2021-01-01: 0.32
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
description: Percentage of eligible infants and toddlers who enroll in Early Head Start.
2+
metadata:
3+
label: Early Head Start take-up rate
4+
unit: /1
5+
reference:
6+
- title: NIEER State(s) of Head Start and Early Head Start Report
7+
href: https://nieer.org/research-library/states-head-start-early-head-start
8+
values:
9+
2020-09-01: 0.09
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
description: The share of eligible individuals who claim the EITC (by number of children).
2+
metadata:
3+
label: EITC take-up rate by number of children
4+
reference:
5+
- title: National Taxpayer Advocate Special Report to Congress 2020 | IRS
6+
href: https://www.taxpayeradvocate.irs.gov/wp-content/uploads/2020/08/JRC20_Volume3.pdf#page=62
7+
# Maps number of children to take-up rate
8+
rates_by_children:
9+
0: 0.65
10+
1: 0.86
11+
2: 0.85
12+
3: 0.85 # Assume same as 2
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
description: Percentage of eligible children who enroll in Head Start.
2+
metadata:
3+
label: Head Start take-up rate
4+
unit: /1
5+
reference:
6+
- title: NIEER State(s) of Head Start and Early Head Start Report
7+
href: https://nieer.org/research-library/states-head-start-early-head-start
8+
values:
9+
2020-09-01: 0.40
10+
2021-09-01: 0.30
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
description: Percentage of people who do enroll in Medicaid, if eligible.
2+
metadata:
3+
label: Medicaid takeup rate
4+
unit: /1
5+
period: year
6+
breakdown:
7+
- state_code
8+
reference:
9+
- title: KFF "A Closer Look at the Remaining Uninsured Population Eligible for Medicaid and CHIP"
10+
href: https://www.kff.org/uninsured/issue-brief/a-closer-look-at-the-remaining-uninsured-population-eligible-for-medicaid-and-chip/
11+
- title: State-specific rates derived from MACPAC enrollment targets vs modeled eligibility
12+
href: https://www.medicaid.gov/medicaid/program-information/medicaid-and-chip-enrollment-data/report-highlights/index.html
13+
rates_by_state:
14+
AK: 0.88
15+
AL: 0.92
16+
AR: 0.79
17+
AZ: 0.95
18+
CA: 0.78
19+
CO: 0.99
20+
CT: 0.89
21+
DC: 0.99
22+
DE: 0.86
23+
FL: 0.98
24+
GA: 0.73
25+
HI: 0.88
26+
IA: 0.84
27+
ID: 0.78
28+
IL: 0.85
29+
IN: 0.99
30+
KS: 0.92
31+
KY: 0.87
32+
LA: 0.79
33+
MA: 0.94
34+
MD: 0.95
35+
ME: 0.92
36+
MI: 0.91
37+
MN: 0.89
38+
MO: 0.89
39+
MS: 0.75
40+
MT: 0.83
41+
NC: 0.94
42+
ND: 0.91
43+
NE: 0.79
44+
NH: 0.84
45+
NJ: 0.74
46+
NM: 0.84
47+
NV: 0.93
48+
NY: 0.86
49+
OH: 0.82
50+
OK: 0.77
51+
OR: 0.92
52+
PA: 0.64
53+
RI: 0.94
54+
SC: 0.93
55+
SD: 0.88
56+
TN: 0.92
57+
TX: 0.76
58+
UT: 0.53
59+
VA: 0.82
60+
VT: 0.93
61+
WA: 0.98
62+
WI: 0.91
63+
WV: 0.83
64+
WY: 0.70
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
description: Percentage of eligible SNAP recipients who claim SNAP.
2+
metadata:
3+
label: SNAP takeup rate
4+
unit: /1
5+
reference:
6+
- title: USDA
7+
href: https://www.fns.usda.gov/usamap
8+
values:
9+
2018-01-01: 0.82

0 commit comments

Comments
 (0)