1515from microimpute .models .qrf import QRF
1616import logging
1717from policyengine_us_data .parameters import load_take_up_rate
18+ from policyengine_us_data .utils .randomness import seeded_rng
1819
1920
2021class CPS (Dataset ):
@@ -192,24 +193,25 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
192193def add_takeup (self ):
193194 data = self .load_dataset ()
194195
195- from policyengine_us import system , Microsimulation
196+ from policyengine_us import Microsimulation
196197
197198 baseline = Microsimulation (dataset = self )
198199
199- # Generate all stochastic take-up decisions using take-up rates from parameter files
200- # This keeps the country package purely deterministic
201- generator = np . random . default_rng ( seed = 100 )
200+ n_persons = len ( data [ "person_id" ])
201+ n_tax_units = len ( data [ "tax_unit_id" ])
202+ n_spm_units = len ( data [ "spm_unit_id" ] )
202203
203- # Load take-up rates from parameter files
204+ # Load take-up rates
204205 eitc_rates_by_children = load_take_up_rate ("eitc" , self .time_period )
205206 dc_ptc_rate = load_take_up_rate ("dc_ptc" , self .time_period )
206207 snap_rate = load_take_up_rate ("snap" , self .time_period )
207208 aca_rate = load_take_up_rate ("aca" , self .time_period )
208- medicaid_rate = load_take_up_rate ("medicaid" , self .time_period )
209+ medicaid_rates_by_state = load_take_up_rate ("medicaid" , self .time_period )
209210 head_start_rate = load_take_up_rate ("head_start" , self .time_period )
210211 early_head_start_rate = load_take_up_rate (
211212 "early_head_start" , self .time_period
212213 )
214+ ssi_pass_rate = load_take_up_rate ("ssi_pass_rate" , self .time_period )
213215
214216 # EITC: varies by number of children
215217 eitc_child_count = baseline .calculate ("eitc_child_count" ).values
@@ -219,38 +221,60 @@ def add_takeup(self):
219221 for c in eitc_child_count
220222 ]
221223 )
222- data ["takes_up_eitc" ] = (
223- generator .random (len (data ["tax_unit_id" ])) < eitc_takeup_rate
224- )
224+ rng = seeded_rng ("takes_up_eitc" )
225+ data ["takes_up_eitc" ] = rng .random (n_tax_units ) < eitc_takeup_rate
225226
226227 # DC Property Tax Credit
227- data ["takes_up_dc_ptc" ] = (
228- generator .random (len (data ["tax_unit_id" ])) < dc_ptc_rate
229- )
228+ rng = seeded_rng ("takes_up_dc_ptc" )
229+ data ["takes_up_dc_ptc" ] = rng .random (n_tax_units ) < dc_ptc_rate
230230
231231 # SNAP
232- data ["takes_up_snap_if_eligible" ] = (
233- generator .random (len (data ["spm_unit_id" ])) < snap_rate
234- )
232+ rng = seeded_rng ("takes_up_snap_if_eligible" )
233+ data ["takes_up_snap_if_eligible" ] = rng .random (n_spm_units ) < snap_rate
235234
236235 # ACA
237- data ["takes_up_aca_if_eligible" ] = (
238- generator .random (len (data ["tax_unit_id" ])) < aca_rate
239- )
236+ rng = seeded_rng ("takes_up_aca_if_eligible" )
237+ data ["takes_up_aca_if_eligible" ] = rng .random (n_tax_units ) < aca_rate
240238
241- # Medicaid
239+ # Medicaid: state-specific rates
240+ state_codes = baseline .calculate ("state_code_str" ).values
241+ hh_ids = data ["household_id" ]
242+ person_hh_ids = data ["person_household_id" ]
243+ hh_to_state = dict (zip (hh_ids , state_codes ))
244+ person_states = np .array (
245+ [hh_to_state .get (hh_id , "CA" ) for hh_id in person_hh_ids ]
246+ )
247+ medicaid_rate_by_person = np .array (
248+ [medicaid_rates_by_state .get (s , 0.93 ) for s in person_states ]
249+ )
250+ rng = seeded_rng ("takes_up_medicaid_if_eligible" )
242251 data ["takes_up_medicaid_if_eligible" ] = (
243- generator .random (len ( data [ "person_id" ])) < medicaid_rate
252+ rng .random (n_persons ) < medicaid_rate_by_person
244253 )
245254
246255 # Head Start
256+ rng = seeded_rng ("takes_up_head_start_if_eligible" )
247257 data ["takes_up_head_start_if_eligible" ] = (
248- generator .random (len ( data [ "person_id" ]) ) < head_start_rate
258+ rng .random (n_persons ) < head_start_rate
249259 )
250260
251261 # Early Head Start
262+ rng = seeded_rng ("takes_up_early_head_start_if_eligible" )
252263 data ["takes_up_early_head_start_if_eligible" ] = (
253- generator .random (len (data ["person_id" ])) < early_head_start_rate
264+ rng .random (n_persons ) < early_head_start_rate
265+ )
266+
267+ # SSI resource test
268+ rng = seeded_rng ("meets_ssi_resource_test" )
269+ data ["meets_ssi_resource_test" ] = rng .random (n_persons ) < ssi_pass_rate
270+
271+ # WIC draws (country package compares against category-specific rates)
272+ rng = seeded_rng ("wic_takeup_draw" )
273+ data ["wic_takeup_draw" ] = rng .random (n_persons ).astype (np .float32 )
274+
275+ rng = seeded_rng ("wic_nutritional_risk_draw" )
276+ data ["wic_nutritional_risk_draw" ] = rng .random (n_persons ).astype (
277+ np .float32
254278 )
255279
256280 self .save_dataset (data )
0 commit comments