Skip to content

Commit 6b2b51e

Browse files
committed
Add JCT tax expenditure source package
1 parent 2305ea5 commit 6b2b51e

6 files changed

Lines changed: 304 additions & 121 deletions

File tree

arch/source_package.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -86,17 +86,11 @@
8686
"census-acs-s2201-congressional-district-snap-2024": Path(
8787
"census/acs_s2201_district_2024"
8888
),
89-
"census-b01001-female-age-2023": Path(
90-
"census/b01001_female_15_44_2023"
91-
),
89+
"census-b01001-female-age-2023": Path("census/b01001_female_15_44_2023"),
9290
"census-pep-2024-national-age-sex": Path("census/pep_2024_national_age_sex"),
9391
"census-pep-2024-state-age-sex": Path("census/pep_2024_state_age_sex"),
94-
"census-population-projections-2023": Path(
95-
"census/population_projections_2023"
96-
),
97-
"census-stc-individual-income-tax": Path(
98-
"census/stc_individual_income_tax"
99-
),
92+
"census-population-projections-2023": Path("census/population_projections_2023"),
93+
"census-stc-individual-income-tax": Path("census/stc_individual_income_tax"),
10094
"cms-medicaid-chip-monthly-enrollment-december-2024": Path(
10195
"cms_medicaid/chip_monthly_enrollment_december_2024"
10296
),
@@ -106,9 +100,7 @@
106100
"cms-aca-oep-state-level": Path("cms_aca/oep_state_level"),
107101
"cms-aca-oep-state-level-2022": Path("cms_aca/oep_state_level_2022"),
108102
"cms-aca-oep-state-level-2025": Path("cms_aca/oep_state_level_2025"),
109-
"cms-aca-effectuated-enrollment-2022": Path(
110-
"cms_aca/effectuated_enrollment_2022"
111-
),
103+
"cms-aca-effectuated-enrollment-2022": Path("cms_aca/effectuated_enrollment_2022"),
112104
"cms-medicare-trustees-report-2025-part-b-premium-income": Path(
113105
"cms_medicare/medicare_trustees_report_2025"
114106
),
@@ -122,6 +114,7 @@
122114
"hhs-acf-liheap-fy2024-national-profile": Path(
123115
"hhs_acf_liheap/fy2024_national_profile"
124116
),
117+
"jct-tax-expenditures-2024": Path("jct/tax_expenditures_2024"),
125118
"soi-table-1-1": Path("irs_soi/table_1_1"),
126119
"soi-table-1-2": Path("irs_soi/table_1_2"),
127120
"soi-table-1-4": Path("irs_soi/table_1_4"),
@@ -132,9 +125,7 @@
132125
),
133126
"soi-table-4-3": Path("irs_soi/table_4_3"),
134127
"soi-state-2022": Path("irs_soi/state_2022"),
135-
"soi-congressional-district-2022": Path(
136-
"irs_soi/congressional_district_2022"
137-
),
128+
"soi-congressional-district-2022": Path("irs_soi/congressional_district_2022"),
138129
"soi-historic-table-2": Path("irs_soi/historic_table_2"),
139130
"soi-historic-table-2-state-agi-2022": Path(
140131
"irs_soi/historic_table_2_state_agi_2022"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
tax_expenditure,label,jct_line_label,individual_revenue_loss_2024,definition_note
2+
salt_deduction,State and local tax deduction,Taxes paid deduction,21700000000,Individual income tax revenue loss for itemized state and local tax deduction.
3+
medical_expense_deduction,Medical expense deduction,Medical expenses deduction,11400000000,Individual income tax revenue loss for itemized medical expense deduction.
4+
charitable_deduction,Charitable contribution deduction,"Charitable contributions, other than education and health; charitable contributions, education; charitable contributions, health",60400000000,Sum of individual income tax revenue-loss rows for charitable contributions excluding corporate income tax estimates.
5+
deductible_mortgage_interest,Deductible mortgage interest,Deductibility of mortgage interest on owner-occupied homes,24800000000,Individual income tax revenue loss for deductible mortgage interest on owner-occupied homes.
6+
qualified_business_income_deduction,Qualified business income deduction,20-percent deduction for qualified business income,63100000000,Individual income tax revenue loss for qualified business income deduction.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
id: jct-tax-expenditures-2024
2+
title: JCT Estimates of Federal Tax Expenditures for Fiscal Years 2024-2028
3+
publisher: Joint Committee on Taxation
4+
description: >
5+
Selected calendar-year 2024 individual income tax revenue-loss estimates from
6+
JCX-48-24 Table 1, curated for Populace fiscal calibration targets that must be
7+
encoded as simple reforms rather than static PolicyEngine variables.
8+
source_page: https://www.jct.gov/publications/2024/jcx-48-24/
9+
source_url: https://www.jct.gov/getattachment/765709fb-9a4b-430a-8f9e-4d342ec97f7e/x-48-24.pdf
10+
retrieved_at: "2026-06-16T00:00:00Z"
11+
license: public-domain
12+
files:
13+
2024:
14+
filename: jct_tax_expenditures_2024.csv
15+
source_url: https://www.jct.gov/getattachment/765709fb-9a4b-430a-8f9e-4d342ec97f7e/x-48-24.pdf
16+
sha256: a6db137830f744ef140c796e23da2673480a70fb3fbcba848476d5ccbf27ffa8
17+
size_bytes: 1154
18+
source_table: Estimates of Federal Tax Expenditures for Fiscal Years 2024-2028
19+
year: 2024
20+
storage:
21+
r2:
22+
provider: r2
23+
bucket: arch-raw
24+
key: raw/jct/jct-tax-expenditures-2024/2024/a6db137830f744ef140c796e23da2673480a70fb3fbcba848476d5ccbf27ffa8/jct_tax_expenditures_2024.csv
25+
uri: r2://arch-raw/raw/jct/jct-tax-expenditures-2024/2024/a6db137830f744ef140c796e23da2673480a70fb3fbcba848476d5ccbf27ffa8/jct_tax_expenditures_2024.csv
26+
notes:
27+
- >
28+
Calendar-year 2024 values use the individual income tax revenue-loss column
29+
only. For charitable contributions, the value sums the three individual
30+
charitable rows and excludes corporate revenue-loss columns.
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
schema_version: arch.source_package.v1
2+
package_id: jct-tax-expenditures-2024
3+
label: JCT 2024 individual income tax expenditure estimates
4+
artifact:
5+
source_name: jct
6+
source_table: Estimates of Federal Tax Expenditures for Fiscal Years 2024-2028
7+
resource_package: db
8+
resource_directory: data/jct/tax_expenditures_2024
9+
manifest: manifest.yaml
10+
vintage: jcx_48_24
11+
extracted_at: "2026-06-16"
12+
extraction_method: manual extraction from JCX-48-24 Table 1 individual income tax revenue-loss columns
13+
parser: delimited_text_full_rows
14+
sheet_name: jct_tax_expenditures_2024
15+
record_sets:
16+
- record_set_id: jct.tax_expenditures.cy2024
17+
record_set_spec_id: jct.individual_tax_expenditure_revenue_loss.v1
18+
source_record_id_prefix: jct.tax_expenditures.cy2024
19+
sheet_name: jct_tax_expenditures_2024
20+
period_type: calendar_year
21+
period: 2024
22+
geography_id: 0100000US
23+
geography_level: country
24+
geography_name: United States
25+
geography_vintage: current
26+
entity: tax_unit
27+
entity_role: federal_income_tax_unit
28+
domain: federal_income_tax
29+
groupby_dimension: jct.tax_expenditure
30+
rows:
31+
- value_id: salt_deduction
32+
label: State and local tax deduction
33+
ordinal: 0
34+
row_number: 2
35+
expected_row_header_column: A
36+
expected_row_header: salt_deduction
37+
filters:
38+
tax_expenditure: salt_deduction
39+
constraints:
40+
- variable: tax_expenditure
41+
operator: "=="
42+
value: salt_deduction
43+
label: JCT tax expenditure row
44+
table_record_kind: total
45+
- value_id: medical_expense_deduction
46+
label: Medical expense deduction
47+
ordinal: 1
48+
row_number: 3
49+
expected_row_header_column: A
50+
expected_row_header: medical_expense_deduction
51+
filters:
52+
tax_expenditure: medical_expense_deduction
53+
constraints:
54+
- variable: tax_expenditure
55+
operator: "=="
56+
value: medical_expense_deduction
57+
label: JCT tax expenditure row
58+
table_record_kind: total
59+
- value_id: charitable_deduction
60+
label: Charitable contribution deduction
61+
ordinal: 2
62+
row_number: 4
63+
expected_row_header_column: A
64+
expected_row_header: charitable_deduction
65+
filters:
66+
tax_expenditure: charitable_deduction
67+
constraints:
68+
- variable: tax_expenditure
69+
operator: "=="
70+
value: charitable_deduction
71+
label: JCT tax expenditure row
72+
table_record_kind: total
73+
- value_id: deductible_mortgage_interest
74+
label: Deductible mortgage interest
75+
ordinal: 3
76+
row_number: 5
77+
expected_row_header_column: A
78+
expected_row_header: deductible_mortgage_interest
79+
filters:
80+
tax_expenditure: deductible_mortgage_interest
81+
constraints:
82+
- variable: tax_expenditure
83+
operator: "=="
84+
value: deductible_mortgage_interest
85+
label: JCT tax expenditure row
86+
table_record_kind: total
87+
- value_id: qualified_business_income_deduction
88+
label: Qualified business income deduction
89+
ordinal: 4
90+
row_number: 6
91+
expected_row_header_column: A
92+
expected_row_header: qualified_business_income_deduction
93+
filters:
94+
tax_expenditure: qualified_business_income_deduction
95+
constraints:
96+
- variable: tax_expenditure
97+
operator: "=="
98+
value: qualified_business_income_deduction
99+
label: JCT tax expenditure row
100+
table_record_kind: total
101+
measures:
102+
- measure_id: revenue_loss
103+
label: Individual income tax revenue loss
104+
ordinal: 0
105+
column: D
106+
source_column_id: individual_revenue_loss_2024
107+
expected_column_header_row: 1
108+
expected_column_header: individual_revenue_loss_2024
109+
concept: jct.individual_tax_expenditure_revenue_loss
110+
source_concept: jct.individual_income_tax_revenue_loss
111+
concept_relation: source_label
112+
unit: usd
113+
aggregation: sum
114+
expected_cell_type: number

tests/test_arch_bundle.py

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@ def test_build_bundle_writes_merged_consumer_contract(tmp_path):
1616
output_dir = tmp_path / "bundle"
1717

1818
report = build_bundle(output_dir, year=2023)
19-
summary = json.loads(
20-
(output_dir / "reports" / "build_bundle.json").read_text()
21-
)
19+
summary = json.loads((output_dir / "reports" / "build_bundle.json").read_text())
2220
rows = _load_jsonl(output_dir / "consumer_facts.jsonl")
2321
source_packages = json.loads((output_dir / "source_packages.json").read_text())
2422
coverage = json.loads((output_dir / "coverage.json").read_text())
@@ -144,11 +142,7 @@ def test_build_bundle_writes_merged_consumer_contract(tmp_path):
144142
]
145143
assert (output_dir / "sources" / "soi-table-1-1" / "consumer_facts.jsonl").exists()
146144
assert (
147-
output_dir
148-
/ "sources"
149-
/ "soi-table-1-4"
150-
/ "reports"
151-
/ "build_summary.json"
145+
output_dir / "sources" / "soi-table-1-4" / "reports" / "build_summary.json"
152146
).exists()
153147
assert (
154148
output_dir
@@ -163,7 +157,10 @@ def test_build_bundle_writes_merged_consumer_contract(tmp_path):
163157
/ "consumer_facts.jsonl"
164158
).exists()
165159
assert (
166-
output_dir / "sources" / "census-stc-individual-income-tax" / "consumer_facts.jsonl"
160+
output_dir
161+
/ "sources"
162+
/ "census-stc-individual-income-tax"
163+
/ "consumer_facts.jsonl"
167164
).exists()
168165
assert (
169166
output_dir
@@ -294,10 +291,7 @@ def test_build_bundle_cli_supports_ssa_supplement_source(tmp_path, capsys):
294291
assert payload["counts"]["fact_count"] == 6
295292
assert payload["coverage"]["counts"]["by_source"] == {"ssa": 6}
296293
assert payload["coverage"]["counts"]["by_entity"] == {"person": 6}
297-
assert {
298-
row["universe_constraints"]["constraints"][0]["value"]
299-
for row in rows
300-
} == {
294+
assert {row["universe_constraints"]["constraints"][0]["value"] for row in rows} == {
301295
"social_security_benefits",
302296
"social_security_retirement_benefits",
303297
"social_security_survivors_benefits",
@@ -307,6 +301,38 @@ def test_build_bundle_cli_supports_ssa_supplement_source(tmp_path, capsys):
307301
}
308302

309303

304+
def test_build_bundle_cli_supports_jct_tax_expenditure_source(tmp_path, capsys):
305+
output_dir = tmp_path / "bundle"
306+
307+
exit_code = harness_main(
308+
[
309+
"build-bundle",
310+
"--year",
311+
"2024",
312+
"--source",
313+
"jct-tax-expenditures-2024",
314+
"--out",
315+
str(output_dir),
316+
]
317+
)
318+
payload = json.loads(capsys.readouterr().out)
319+
rows = _load_jsonl(output_dir / "consumer_facts.jsonl")
320+
321+
assert exit_code == 0
322+
assert payload["valid"]
323+
assert payload["counts"]["source_package_count"] == 1
324+
assert payload["counts"]["fact_count"] == 5
325+
assert payload["coverage"]["counts"]["by_source"] == {"jct": 5}
326+
assert payload["coverage"]["counts"]["by_entity"] == {"tax_unit": 5}
327+
assert {row["lineage"]["source_record_id"] for row in rows} == {
328+
"jct.tax_expenditures.cy2024.salt_deduction.revenue_loss",
329+
"jct.tax_expenditures.cy2024.medical_expense_deduction.revenue_loss",
330+
"jct.tax_expenditures.cy2024.charitable_deduction.revenue_loss",
331+
"jct.tax_expenditures.cy2024.deductible_mortgage_interest.revenue_loss",
332+
"jct.tax_expenditures.cy2024.qualified_business_income_deduction.revenue_loss",
333+
}
334+
335+
310336
def test_build_bundle_coverage_reports_duplicate_keys():
311337
rows = [
312338
{

0 commit comments

Comments
 (0)