1+ import argparse
2+
13from sqlmodel import Session , create_engine
24import pandas as pd
35
1214 get_or_create_source ,
1315)
1416
17+ DEFAULT_DATASET = "hf://policyengine/policyengine-us-data/calibration/stratified_extended_cps.h5"
18+
1519
16- def extract_national_targets ():
20+ def extract_national_targets (dataset : str = DEFAULT_DATASET ):
1721 """
1822 Extract national calibration targets from various sources.
1923
24+ Parameters
25+ ----------
26+ dataset : str
27+ Path to the calibration dataset (local path or HuggingFace URL).
28+ The time period is derived from the dataset's default_calculation_period.
29+
2030 Returns
2131 -------
2232 dict
@@ -26,18 +36,17 @@ def extract_national_targets():
2636 - conditional_count_targets: Enrollment counts requiring constraints
2737 - cbo_targets: List of CBO projection targets
2838 - treasury_targets: List of Treasury/JCT targets
39+ - time_period: The year derived from the dataset
2940 """
30-
31- # Initialize PolicyEngine for parameter access
3241 from policyengine_us import Microsimulation
3342
34- sim = Microsimulation (
35- dataset = "hf://policyengine/policyengine-us-data/cps_2023.h5"
36- )
43+ print (f"Loading dataset: { dataset } " )
44+ sim = Microsimulation (dataset = dataset )
45+
46+ time_period = int (sim .default_calculation_period )
47+ print (f"Derived time_period from dataset: { time_period } " )
3748
38- # Direct sum targets - these are regular variables that can be summed
39- # Store with their actual source year (2024 for hardcoded values from loss.py)
40- HARDCODED_YEAR = 2024
49+ # Direct sum targets - use the time_period derived from the dataset
4150
4251 # Separate tax-related targets that need filer constraint
4352 tax_filer_targets = [
@@ -46,35 +55,35 @@ def extract_national_targets():
4655 "value" : 21.247e9 ,
4756 "source" : "Joint Committee on Taxation" ,
4857 "notes" : "SALT deduction tax expenditure" ,
49- "year" : HARDCODED_YEAR ,
58+ "year" : time_period ,
5059 },
5160 {
5261 "variable" : "medical_expense_deduction" ,
5362 "value" : 11.4e9 ,
5463 "source" : "Joint Committee on Taxation" ,
5564 "notes" : "Medical expense deduction tax expenditure" ,
56- "year" : HARDCODED_YEAR ,
65+ "year" : time_period ,
5766 },
5867 {
5968 "variable" : "charitable_deduction" ,
6069 "value" : 65.301e9 ,
6170 "source" : "Joint Committee on Taxation" ,
6271 "notes" : "Charitable deduction tax expenditure" ,
63- "year" : HARDCODED_YEAR ,
72+ "year" : time_period ,
6473 },
6574 {
6675 "variable" : "interest_deduction" ,
6776 "value" : 24.8e9 ,
6877 "source" : "Joint Committee on Taxation" ,
6978 "notes" : "Mortgage interest deduction tax expenditure" ,
70- "year" : HARDCODED_YEAR ,
79+ "year" : time_period ,
7180 },
7281 {
7382 "variable" : "qualified_business_income_deduction" ,
7483 "value" : 63.1e9 ,
7584 "source" : "Joint Committee on Taxation" ,
7685 "notes" : "QBI deduction tax expenditure" ,
77- "year" : HARDCODED_YEAR ,
86+ "year" : time_period ,
7887 },
7988 ]
8089
@@ -84,112 +93,112 @@ def extract_national_targets():
8493 "value" : 13e9 ,
8594 "source" : "Survey-reported (post-TCJA grandfathered)" ,
8695 "notes" : "Alimony received - survey reported, not tax-filer restricted" ,
87- "year" : HARDCODED_YEAR ,
96+ "year" : time_period ,
8897 },
8998 {
9099 "variable" : "alimony_expense" ,
91100 "value" : 13e9 ,
92101 "source" : "Survey-reported (post-TCJA grandfathered)" ,
93102 "notes" : "Alimony paid - survey reported, not tax-filer restricted" ,
94- "year" : HARDCODED_YEAR ,
103+ "year" : time_period ,
95104 },
96105 {
97106 "variable" : "medicaid" ,
98107 "value" : 871.7e9 ,
99108 "source" : "https://www.cms.gov/files/document/highlights.pdf" ,
100109 "notes" : "CMS 2023 highlights document - total Medicaid spending" ,
101- "year" : HARDCODED_YEAR ,
110+ "year" : time_period ,
102111 },
103112 {
104113 "variable" : "net_worth" ,
105114 "value" : 160e12 ,
106115 "source" : "Federal Reserve SCF" ,
107116 "notes" : "Total household net worth" ,
108- "year" : HARDCODED_YEAR ,
117+ "year" : time_period ,
109118 },
110119 {
111120 "variable" : "health_insurance_premiums_without_medicare_part_b" ,
112121 "value" : 385e9 ,
113122 "source" : "MEPS/NHEA" ,
114123 "notes" : "Health insurance premiums excluding Medicare Part B" ,
115- "year" : HARDCODED_YEAR ,
124+ "year" : time_period ,
116125 },
117126 {
118127 "variable" : "other_medical_expenses" ,
119128 "value" : 278e9 ,
120129 "source" : "MEPS/NHEA" ,
121130 "notes" : "Out-of-pocket medical expenses" ,
122- "year" : HARDCODED_YEAR ,
131+ "year" : time_period ,
123132 },
124133 {
125134 "variable" : "medicare_part_b_premiums" ,
126135 "value" : 112e9 ,
127136 "source" : "CMS Medicare data" ,
128137 "notes" : "Medicare Part B premium payments" ,
129- "year" : HARDCODED_YEAR ,
138+ "year" : time_period ,
130139 },
131140 {
132141 "variable" : "over_the_counter_health_expenses" ,
133142 "value" : 72e9 ,
134143 "source" : "Consumer Expenditure Survey" ,
135144 "notes" : "OTC health products and supplies" ,
136- "year" : HARDCODED_YEAR ,
145+ "year" : time_period ,
137146 },
138147 {
139148 "variable" : "child_support_expense" ,
140149 "value" : 33e9 ,
141150 "source" : "Census Bureau" ,
142151 "notes" : "Child support payments" ,
143- "year" : HARDCODED_YEAR ,
152+ "year" : time_period ,
144153 },
145154 {
146155 "variable" : "child_support_received" ,
147156 "value" : 33e9 ,
148157 "source" : "Census Bureau" ,
149158 "notes" : "Child support received" ,
150- "year" : HARDCODED_YEAR ,
159+ "year" : time_period ,
151160 },
152161 {
153162 "variable" : "spm_unit_capped_work_childcare_expenses" ,
154163 "value" : 348e9 ,
155164 "source" : "Census Bureau SPM" ,
156165 "notes" : "Work and childcare expenses for SPM" ,
157- "year" : HARDCODED_YEAR ,
166+ "year" : time_period ,
158167 },
159168 {
160169 "variable" : "spm_unit_capped_housing_subsidy" ,
161170 "value" : 35e9 ,
162171 "source" : "HUD/Census" ,
163172 "notes" : "Housing subsidies" ,
164- "year" : HARDCODED_YEAR ,
173+ "year" : time_period ,
165174 },
166175 {
167176 "variable" : "tanf" ,
168177 "value" : 9e9 ,
169178 "source" : "HHS/ACF" ,
170179 "notes" : "TANF cash assistance" ,
171- "year" : HARDCODED_YEAR ,
180+ "year" : time_period ,
172181 },
173182 {
174183 "variable" : "real_estate_taxes" ,
175184 "value" : 500e9 ,
176185 "source" : "Census Bureau" ,
177186 "notes" : "Property taxes paid" ,
178- "year" : HARDCODED_YEAR ,
187+ "year" : time_period ,
179188 },
180189 {
181190 "variable" : "rent" ,
182191 "value" : 735e9 ,
183192 "source" : "Census Bureau/BLS" ,
184193 "notes" : "Rental payments" ,
185- "year" : HARDCODED_YEAR ,
194+ "year" : time_period ,
186195 },
187196 {
188197 "variable" : "tip_income" ,
189198 "value" : 53.2e9 ,
190199 "source" : "IRS Form W-2 Box 7 statistics" ,
191200 "notes" : "Social security tips uprated 40% to account for underreporting" ,
192- "year" : HARDCODED_YEAR ,
201+ "year" : time_period ,
193202 },
194203 # SSA benefit-type totals derived from trust fund data and
195204 # SSA fact sheet type shares
@@ -198,43 +207,43 @@ def extract_national_targets():
198207 "value" : 1_060e9 ,
199208 "source" : "https://www.ssa.gov/OACT/STATS/table4a3.html" ,
200209 "notes" : "~73% of total OASDI ($1,452B CBO projection)" ,
201- "year" : HARDCODED_YEAR ,
210+ "year" : time_period ,
202211 },
203212 {
204213 "variable" : "social_security_disability" ,
205214 "value" : 148e9 ,
206215 "source" : "https://www.ssa.gov/OACT/STATS/table4a3.html" ,
207216 "notes" : "~10.2% of total OASDI (disabled workers)" ,
208- "year" : HARDCODED_YEAR ,
217+ "year" : time_period ,
209218 },
210219 {
211220 "variable" : "social_security_survivors" ,
212221 "value" : 160e9 ,
213222 "source" : "https://www.ssa.gov/OACT/FACTS/" ,
214223 "notes" : "~11.0% of total OASDI (widows, children of deceased)" ,
215- "year" : HARDCODED_YEAR ,
224+ "year" : time_period ,
216225 },
217226 {
218227 "variable" : "social_security_dependents" ,
219228 "value" : 84e9 ,
220229 "source" : "https://www.ssa.gov/OACT/FACTS/" ,
221230 "notes" : "~5.8% of total OASDI (spouses/children of retired+disabled)" ,
222- "year" : HARDCODED_YEAR ,
231+ "year" : time_period ,
223232 },
224233 # IRA contribution totals from IRS SOI accumulation tables
225234 {
226235 "variable" : "traditional_ira_contributions" ,
227236 "value" : 25e9 ,
228237 "source" : "https://www.irs.gov/statistics/soi-tax-stats-accumulation-and-distribution-of-individual-retirement-arrangements" ,
229238 "notes" : "Tax year 2022 (~5M x $4,510 avg) uprated ~12% to 2024" ,
230- "year" : HARDCODED_YEAR ,
239+ "year" : time_period ,
231240 },
232241 {
233242 "variable" : "roth_ira_contributions" ,
234243 "value" : 39e9 ,
235244 "source" : "https://www.irs.gov/statistics/soi-tax-stats-accumulation-and-distribution-of-individual-retirement-arrangements" ,
236245 "notes" : "Tax year 2022 (~10M x $3,482 avg) uprated ~12% to 2024" ,
237- "year" : HARDCODED_YEAR ,
246+ "year" : time_period ,
238247 },
239248 ]
240249
@@ -247,15 +256,15 @@ def extract_national_targets():
247256 "person_count" : 72_429_055 ,
248257 "source" : "CMS/HHS administrative data" ,
249258 "notes" : "Medicaid enrollment count" ,
250- "year" : HARDCODED_YEAR ,
259+ "year" : time_period ,
251260 },
252261 {
253262 "constraint_variable" : "aca_ptc" ,
254263 "stratum_group_id" : None , # Will use a generic stratum or create new group
255264 "person_count" : 19_743_689 ,
256265 "source" : "CMS marketplace data" ,
257266 "notes" : "ACA Premium Tax Credit recipients" ,
258- "year" : HARDCODED_YEAR ,
267+ "year" : time_period ,
259268 },
260269 ]
261270
@@ -302,8 +311,7 @@ def extract_national_targets():
302311
303312 conditional_count_targets .extend (ssn_none_targets_by_year )
304313
305- # CBO projection targets - get for a specific year
306- CBO_YEAR = 2023 # Year the CBO projections are for
314+ # CBO projection targets - use time_period derived from dataset
307315 cbo_vars = [
308316 # Note: income_tax_positive matches CBO's receipts definition
309317 # where refundable credit payments in excess of liability are
@@ -326,15 +334,15 @@ def extract_national_targets():
326334 param_name = cbo_param_name_map .get (variable_name , variable_name )
327335 try :
328336 value = sim .tax_benefit_system .parameters (
329- CBO_YEAR
337+ time_period
330338 ).calibration .gov .cbo ._children [param_name ]
331339 cbo_targets .append (
332340 {
333341 "variable" : variable_name ,
334342 "value" : float (value ),
335343 "source" : "CBO Budget Projections" ,
336344 "notes" : f"CBO projection for { variable_name } " ,
337- "year" : CBO_YEAR ,
345+ "year" : time_period ,
338346 }
339347 )
340348 except (KeyError , AttributeError ) as e :
@@ -343,19 +351,18 @@ def extract_national_targets():
343351 f"{ variable_name } (param: { param_name } ): { e } "
344352 )
345353
346- # Treasury/JCT targets (EITC) - get for a specific year
347- TREASURY_YEAR = 2023
354+ # Treasury/JCT targets (EITC) - use time_period derived from dataset
348355 try :
349356 eitc_value = sim .tax_benefit_system .parameters .calibration .gov .treasury .tax_expenditures .eitc (
350- TREASURY_YEAR
357+ time_period
351358 )
352359 treasury_targets = [
353360 {
354361 "variable" : "eitc" ,
355362 "value" : float (eitc_value ),
356363 "source" : "Treasury/JCT Tax Expenditures" ,
357364 "notes" : "EITC tax expenditure" ,
358- "year" : TREASURY_YEAR ,
365+ "year" : time_period ,
359366 }
360367 ]
361368 except (KeyError , AttributeError ) as e :
@@ -368,6 +375,7 @@ def extract_national_targets():
368375 "conditional_count_targets" : conditional_count_targets ,
369376 "cbo_targets" : cbo_targets ,
370377 "treasury_targets" : treasury_targets ,
378+ "time_period" : time_period ,
371379 }
372380
373381
@@ -707,10 +715,25 @@ def load_national_targets(
707715
708716def main ():
709717 """Main ETL pipeline for national targets."""
718+ parser = argparse .ArgumentParser (
719+ description = "ETL for national calibration targets"
720+ )
721+ parser .add_argument (
722+ "--dataset" ,
723+ default = DEFAULT_DATASET ,
724+ help = (
725+ "Source dataset (local path or HuggingFace URL). "
726+ "The time_period for targets is derived from the dataset's "
727+ "default_calculation_period. Default: %(default)s"
728+ ),
729+ )
730+ args = parser .parse_args ()
710731
711732 # Extract
712733 print ("Extracting national targets..." )
713- raw_targets = extract_national_targets ()
734+ raw_targets = extract_national_targets (dataset = args .dataset )
735+ time_period = raw_targets ["time_period" ]
736+ print (f"Using time_period={ time_period } for CBO/Treasury targets" )
714737
715738 # Transform
716739 print ("Transforming targets..." )
0 commit comments