fix: Add PolicyReformAnalysis.model_rebuild() and update example script

anth-volk · claude · anth-volk · commit bc5dcff8d274 · 2026-03-18T23:53:38.000+01:00
- Add model_rebuild() for PolicyReformAnalysis in both US and UK __init__.py
  to resolve BudgetSummaryItem forward reference (TYPE_CHECKING import)
- Fix test_aggregate to expect ValueError instead of StopIteration
- Fix example script bp.metric → bp.poverty_type to match Poverty class

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/examples/us_budgetary_impact.py b/examples/us_budgetary_impact.py
@@ -0,0 +1,155 @@
+"""Example: US budgetary impact comparison between baseline and reform.
+
+Demonstrates the canonical policyengine.py workflow:
+1. Ensure datasets exist (download + compute or load from cache)
+2. Define a parametric reform
+3. Run baseline and reform simulations
+4. Use economic_impact_analysis() for the full analysis
+5. Use ChangeAggregate for targeted single-metric queries
+
+Run: python examples/us_budgetary_impact.py
+"""
+
+import datetime
+
+from policyengine.core import Parameter, ParameterValue, Policy, Simulation
+from policyengine.outputs.change_aggregate import (
+    ChangeAggregate,
+    ChangeAggregateType,
+)
+from policyengine.tax_benefit_models.us import (
+    economic_impact_analysis,
+    ensure_datasets,
+    us_latest,
+)
+
+
+def main():
+    year = 2026
+
+    # ── Step 1: Get dataset (downloads from HuggingFace on first run) ──
+    print("Ensuring datasets are available...")
+    datasets = ensure_datasets(
+        datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"],
+        years=[year],
+        data_folder="./data",
+    )
+    dataset = datasets[f"enhanced_cps_2024_{year}"]
+    print(f"  Loaded: {dataset}")
+
+    # ── Step 2: Define a reform ──
+    # Example: double the standard deduction for single filers
+    param = Parameter(
+        name="gov.irs.deductions.standard.amount.SINGLE",
+        tax_benefit_model_version=us_latest,
+    )
+    reform = Policy(
+        name="Double standard deduction (single)",
+        parameter_values=[
+            ParameterValue(
+                parameter=param,
+                start_date=datetime.date(year, 1, 1),
+                end_date=datetime.date(year, 12, 31),
+                value=30_950,
+            ),
+        ],
+    )
+
+    # ── Step 3: Create simulations ──
+    baseline_sim = Simulation(
+        dataset=dataset,
+        tax_benefit_model_version=us_latest,
+    )
+    reform_sim = Simulation(
+        dataset=dataset,
+        tax_benefit_model_version=us_latest,
+        policy=reform,
+    )
+
+    # ── Step 4a: Quick budgetary number via ChangeAggregate ──
+    # This requires running the simulations first.
+    print("\nRunning simulations...")
+    baseline_sim.run()
+    reform_sim.run()
+
+    tax_change = ChangeAggregate(
+        baseline_simulation=baseline_sim,
+        reform_simulation=reform_sim,
+        variable="household_tax",
+        aggregate_type=ChangeAggregateType.SUM,
+    )
+    tax_change.run()
+    print("\nQuick budgetary result:")
+    print(f"  Tax revenue change: ${tax_change.result / 1e9:.2f}B")
+
+    # Count winners and losers
+    winners = ChangeAggregate(
+        baseline_simulation=baseline_sim,
+        reform_simulation=reform_sim,
+        variable="household_net_income",
+        aggregate_type=ChangeAggregateType.COUNT,
+        change_geq=1,
+    )
+    losers = ChangeAggregate(
+        baseline_simulation=baseline_sim,
+        reform_simulation=reform_sim,
+        variable="household_net_income",
+        aggregate_type=ChangeAggregateType.COUNT,
+        change_leq=-1,
+    )
+    winners.run()
+    losers.run()
+    print(f"  Winners: {winners.result / 1e6:.2f}M households")
+    print(f"  Losers: {losers.result / 1e6:.2f}M households")
+
+    # ── Step 4b: Full analysis via economic_impact_analysis ──
+    # Note: this calls .ensure() internally, which is a no-op here since
+    # we already ran the simulations above. If we hadn't called .run(),
+    # ensure() would run + cache them automatically.
+    print("\nRunning full economic impact analysis...")
+    analysis = economic_impact_analysis(baseline_sim, reform_sim)
+
+    print("\n=== Program-by-Program Impact ===")
+    for prog in analysis.program_statistics.outputs:
+        print(
+            f"  {prog.program_name:30s}  "
+            f"baseline=${prog.baseline_total / 1e9:8.1f}B  "
+            f"reform=${prog.reform_total / 1e9:8.1f}B  "
+            f"change=${prog.change / 1e9:+8.1f}B"
+        )
+
+    print("\n=== Decile Impacts ===")
+    for d in analysis.decile_impacts.outputs:
+        print(
+            f"  Decile {d.decile:2d}:  "
+            f"avg change=${d.absolute_change:+8.0f}  "
+            f"relative={d.relative_change:+.2%}"
+        )
+
+    print("\n=== Poverty ===")
+    for bp, rp in zip(
+        analysis.baseline_poverty.outputs,
+        analysis.reform_poverty.outputs,
+        strict=True,
+    ):
+        print(
+            f"  {bp.poverty_type:30s}  "
+            f"baseline={bp.rate:.4f}  "
+            f"reform={rp.rate:.4f}  "
+            f"change={rp.rate - bp.rate:+.4f}"
+        )
+
+    print("\n=== Inequality ===")
+    bi = analysis.baseline_inequality
+    ri = analysis.reform_inequality
+    print(f"  Gini:           baseline={bi.gini:.4f}  reform={ri.gini:.4f}")
+    print(
+        f"  Top 10% share:  baseline={bi.top_10_share:.4f}  reform={ri.top_10_share:.4f}"
+    )
+    print(
+        f"  Top 1% share:   baseline={bi.top_1_share:.4f}  reform={ri.top_1_share:.4f}"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/policyengine/tax_benefit_models/uk/__init__.py b/src/policyengine/tax_benefit_models/uk/__init__.py
@@ -37,6 +37,9 @@
     PolicyEngineUKLatest.model_rebuild()
     ProgrammeStatistics.model_rebuild(_types_namespace={"Simulation": Simulation})
     BudgetSummaryItem.model_rebuild(_types_namespace={"Simulation": Simulation})
+    PolicyReformAnalysis.model_rebuild(
+        _types_namespace={"BudgetSummaryItem": BudgetSummaryItem}
+    )
 
     __all__ = [
         "UKYearData",
diff --git a/src/policyengine/tax_benefit_models/us/__init__.py b/src/policyengine/tax_benefit_models/us/__init__.py
@@ -37,6 +37,9 @@
     PolicyEngineUSLatest.model_rebuild()
     ProgramStatistics.model_rebuild(_types_namespace={"Simulation": Simulation})
     BudgetSummaryItem.model_rebuild(_types_namespace={"Simulation": Simulation})
+    PolicyReformAnalysis.model_rebuild(
+        _types_namespace={"BudgetSummaryItem": BudgetSummaryItem}
+    )
 
     __all__ = [
         "USYearData",
diff --git a/tests/test_aggregate.py b/tests/test_aggregate.py
@@ -478,7 +478,7 @@ def test_aggregate_invalid_variable():
             variable="nonexistent_variable",
             aggregate_type=AggregateType.SUM,
         )
-        with pytest.raises(StopIteration):
+        with pytest.raises(ValueError):
             agg.run()
 
         # Invalid filter variable name should raise error on run()
@@ -488,5 +488,5 @@ def test_aggregate_invalid_variable():
             aggregate_type=AggregateType.SUM,
             filter_variable="nonexistent_filter",
         )
-        with pytest.raises(StopIteration):
+        with pytest.raises(ValueError):
             agg.run()

Original file line number	Diff line number	Diff line change
`@@ -478,7 +478,7 @@ def test_aggregate_invalid_variable():`
`478`	`478`	`variable="nonexistent_variable",`
`479`	`479`	`aggregate_type=AggregateType.SUM,`
`480`	`480`	`)`
`481`		`- with pytest.raises(StopIteration):`
	`481`	`+ with pytest.raises(ValueError):`
`482`	`482`	`agg.run()`
`483`	`483`
`484`	`484`	`# Invalid filter variable name should raise error on run()`
`@@ -488,5 +488,5 @@ def test_aggregate_invalid_variable():`
`488`	`488`	`aggregate_type=AggregateType.SUM,`
`489`	`489`	`filter_variable="nonexistent_filter",`
`490`	`490`	`)`
`491`		`- with pytest.raises(StopIteration):`
	`491`	`+ with pytest.raises(ValueError):`
`492`	`492`	`agg.run()`