From cfd19e7ef7858dbd306c5ab80b906b2166bc5f91 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 28 Mar 2026 09:18:24 -0400 Subject: [PATCH 1/3] Assign distinct reform IDs to tax expenditure targets --- ...stinct-tax-expenditure-reform-ids.fixed.md | 1 + .../db/etl_national_targets.py | 19 ++++++++++------ .../tests/test_database_build.py | 22 +++++++++++++++++++ 3 files changed, 35 insertions(+), 7 deletions(-) create mode 100644 changelog.d/distinct-tax-expenditure-reform-ids.fixed.md diff --git a/changelog.d/distinct-tax-expenditure-reform-ids.fixed.md b/changelog.d/distinct-tax-expenditure-reform-ids.fixed.md new file mode 100644 index 000000000..827fe64a2 --- /dev/null +++ b/changelog.d/distinct-tax-expenditure-reform-ids.fixed.md @@ -0,0 +1 @@ +Assign distinct `reform_id` values to each national JCT tax expenditure target instead of reusing a single generic reform id for all of them. diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py index 520397adc..a619265d8 100644 --- a/policyengine_us_data/db/etl_national_targets.py +++ b/policyengine_us_data/db/etl_national_targets.py @@ -14,8 +14,6 @@ etl_argparser, ) -TAX_EXPENDITURE_REFORM_ID = 1 - def extract_national_targets(dataset: str = DEFAULT_DATASET): """ @@ -64,8 +62,9 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET): # These JCT values are tax expenditures, not baseline deduction totals. # They must be matched against repeal-based income tax deltas in the # unified calibration path. - tax_expenditure_targets = [ + raw_tax_expenditure_targets = [ { + "reform_id": 1, "variable": "salt_deduction", "value": 21.247e9, "source": "Joint Committee on Taxation", @@ -73,6 +72,7 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET): "year": HARDCODED_YEAR, }, { + "reform_id": 2, "variable": "medical_expense_deduction", "value": 11.4e9, "source": "Joint Committee on Taxation", @@ -80,6 +80,7 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET): "year": HARDCODED_YEAR, }, { + "reform_id": 3, "variable": "charitable_deduction", "value": 65.301e9, "source": "Joint Committee on Taxation", @@ -87,6 +88,7 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET): "year": HARDCODED_YEAR, }, { + "reform_id": 4, "variable": "deductible_mortgage_interest", "value": 24.8e9, "source": "Joint Committee on Taxation", @@ -94,6 +96,7 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET): "year": HARDCODED_YEAR, }, { + "reform_id": 5, "variable": "qualified_business_income_deduction", "value": 63.1e9, "source": "Joint Committee on Taxation", @@ -101,6 +104,7 @@ def extract_national_targets(dataset: str = DEFAULT_DATASET): "year": HARDCODED_YEAR, }, ] + tax_expenditure_targets = [{**target} for target in raw_tax_expenditure_targets] direct_sum_targets = [ { @@ -493,7 +497,7 @@ def load_national_targets( with Session(engine) as session: # Get the national stratum us_stratum = ( - session.query(Stratum).filter(Stratum.parent_stratum_id == None).first() + session.query(Stratum).filter(Stratum.parent_stratum_id.is_(None)).first() ) if not us_stratum: @@ -631,6 +635,7 @@ def load_national_targets( for _, target_data in tax_expenditure_df.iterrows(): target_year = target_data["year"] + target_reform_id = int(target_data["reform_id"]) # Clean up incorrectly scoped baseline rows from older DBs. if migrated_stratum_ids: @@ -641,7 +646,7 @@ def load_national_targets( Target.variable == target_data["variable"], Target.period == target_year, Target.reform_id == 0, - Target.active == True, + Target.active, ) .all() ) @@ -654,7 +659,7 @@ def load_national_targets( Target.stratum_id == us_stratum.stratum_id, Target.variable == target_data["variable"], Target.period == target_year, - Target.reform_id == TAX_EXPENDITURE_REFORM_ID, + Target.reform_id == target_reform_id, ) .first() ) @@ -679,7 +684,7 @@ def load_national_targets( stratum_id=us_stratum.stratum_id, variable=target_data["variable"], period=target_year, - reform_id=TAX_EXPENDITURE_REFORM_ID, + reform_id=target_reform_id, value=target_data["value"], active=True, source="PolicyEngine", diff --git a/policyengine_us_data/tests/test_database_build.py b/policyengine_us_data/tests/test_database_build.py index d151fbd60..8dd35caa2 100644 --- a/policyengine_us_data/tests/test_database_build.py +++ b/policyengine_us_data/tests/test_database_build.py @@ -144,6 +144,28 @@ def test_jct_mortgage_tax_expenditure_uses_mortgage_specific_variable(built_db): ] +def test_jct_tax_expenditure_targets_have_distinct_reform_ids(built_db): + """Each JCT tax expenditure target should have its own reform id.""" + conn = sqlite3.connect(str(built_db)) + rows = conn.execute(""" + SELECT t.variable, t.reform_id + FROM targets t + WHERE t.notes LIKE '%Modeled as repeal-based income tax expenditure target%' + AND t.notes LIKE '%Source: Joint Committee on Taxation%' + ORDER BY t.variable + """).fetchall() + conn.close() + + expected = [ + ("charitable_deduction", 3), + ("deductible_mortgage_interest", 4), + ("medical_expense_deduction", 2), + ("qualified_business_income_deduction", 5), + ("salt_deduction", 1), + ] + + assert rows == expected + def test_state_income_tax_targets(built_db): """State income tax targets should cover all income-tax states.""" conn = sqlite3.connect(str(built_db)) From 597489a91889033b4b0be20a122c96631a081af3 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 28 Mar 2026 18:57:51 -0400 Subject: [PATCH 2/3] Fix reform-id calibration test coverage --- policyengine_us_data/db/create_database_tables.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/policyengine_us_data/db/create_database_tables.py b/policyengine_us_data/db/create_database_tables.py index 1e279979f..8ab860ed2 100644 --- a/policyengine_us_data/db/create_database_tables.py +++ b/policyengine_us_data/db/create_database_tables.py @@ -522,7 +522,6 @@ def create_or_replace_views(engine) -> None: conn.execute(text(TARGET_OVERVIEW_VIEW)) conn.commit() - def refresh_views_for_db_path(db_path: str | Path) -> None: """Refresh SQL views for an existing SQLite database file.""" engine = create_engine(f"sqlite:///{Path(db_path)}") @@ -530,7 +529,5 @@ def refresh_views_for_db_path(db_path: str | Path) -> None: create_or_replace_views(engine) finally: engine.dispose() - - if __name__ == "__main__": engine = create_database() From 34109c15eb16c20a46aea5273bf5f00faeabe0a7 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 29 Mar 2026 12:01:51 -0400 Subject: [PATCH 3/3] Format files for lint --- policyengine_us_data/db/create_database_tables.py | 3 +++ policyengine_us_data/tests/test_database_build.py | 1 + 2 files changed, 4 insertions(+) diff --git a/policyengine_us_data/db/create_database_tables.py b/policyengine_us_data/db/create_database_tables.py index 8ab860ed2..1e279979f 100644 --- a/policyengine_us_data/db/create_database_tables.py +++ b/policyengine_us_data/db/create_database_tables.py @@ -522,6 +522,7 @@ def create_or_replace_views(engine) -> None: conn.execute(text(TARGET_OVERVIEW_VIEW)) conn.commit() + def refresh_views_for_db_path(db_path: str | Path) -> None: """Refresh SQL views for an existing SQLite database file.""" engine = create_engine(f"sqlite:///{Path(db_path)}") @@ -529,5 +530,7 @@ def refresh_views_for_db_path(db_path: str | Path) -> None: create_or_replace_views(engine) finally: engine.dispose() + + if __name__ == "__main__": engine = create_database() diff --git a/policyengine_us_data/tests/test_database_build.py b/policyengine_us_data/tests/test_database_build.py index 8dd35caa2..c9d3c72ed 100644 --- a/policyengine_us_data/tests/test_database_build.py +++ b/policyengine_us_data/tests/test_database_build.py @@ -166,6 +166,7 @@ def test_jct_tax_expenditure_targets_have_distinct_reform_ids(built_db): assert rows == expected + def test_state_income_tax_targets(built_db): """State income tax targets should cover all income-tax states.""" conn = sqlite3.connect(str(built_db))