|
| 1 | +"""Downstream tax-benefit aggregate validation (paper reviewer response B2). |
| 2 | +
|
| 3 | +Input-target validation (see ``soi.py``, ``baseline.py``) asks whether |
| 4 | +the calibrated synthetic frame's marginal sums match administrative |
| 5 | +totals on the *variables the calibrator was told to target*. |
| 6 | +Downstream validation asks the different, stricter question: when the |
| 7 | +calibrated frame is ingested by ``policyengine_us.Microsimulation``, |
| 8 | +do the *computed policy outputs* — federal income tax, EITC, CTC, |
| 9 | +SNAP, SSI, ACA PTC — match administrative aggregates? |
| 10 | +
|
| 11 | +This module contains: |
| 12 | +
|
| 13 | +- ``DownstreamBenchmark`` record (name, computed, benchmark, unit, source). |
| 14 | +- ``DOWNSTREAM_BENCHMARKS_2024`` canonical 2024 benchmark set. Each |
| 15 | + record is sourced to an IRS / USDA / SSA / CMS / CBO publication. |
| 16 | +- ``compute_downstream_aggregates(dataset_path, period)`` runs the |
| 17 | + simulation and returns a dict of variable → weighted sum. |
| 18 | +- ``compute_downstream_comparison(aggregates, benchmarks)`` joins |
| 19 | + computed values to benchmarks and returns per-variable errors. |
| 20 | +
|
| 21 | +Benchmark numbers are rounded publicly-reported totals; each has a |
| 22 | +citation. Updates should be traceable to the cited source. |
| 23 | +""" |
| 24 | + |
| 25 | +from __future__ import annotations |
| 26 | + |
| 27 | +from dataclasses import asdict, dataclass, field |
| 28 | +from pathlib import Path |
| 29 | +from typing import Iterable |
| 30 | + |
| 31 | + |
| 32 | +@dataclass(frozen=True) |
| 33 | +class DownstreamBenchmark: |
| 34 | + """One external-benchmark comparison. |
| 35 | +
|
| 36 | + ``benchmark`` is the published external aggregate (e.g. IRS SOI |
| 37 | + total EITC disbursed 2024). ``computed`` is the aggregate computed |
| 38 | + on the calibrated synthetic frame by ``policyengine_us``. |
| 39 | + """ |
| 40 | + |
| 41 | + name: str |
| 42 | + computed: float |
| 43 | + benchmark: float |
| 44 | + unit: str |
| 45 | + source: str |
| 46 | + |
| 47 | + @property |
| 48 | + def abs_error(self) -> float: |
| 49 | + return self.computed - self.benchmark |
| 50 | + |
| 51 | + @property |
| 52 | + def rel_error(self) -> float | None: |
| 53 | + if self.benchmark == 0: |
| 54 | + return None |
| 55 | + return (self.computed - self.benchmark) / self.benchmark |
| 56 | + |
| 57 | + def to_dict(self) -> dict[str, object]: |
| 58 | + return { |
| 59 | + "name": self.name, |
| 60 | + "computed": self.computed, |
| 61 | + "benchmark": self.benchmark, |
| 62 | + "unit": self.unit, |
| 63 | + "source": self.source, |
| 64 | + "abs_error": self.abs_error, |
| 65 | + "rel_error": self.rel_error, |
| 66 | + } |
| 67 | + |
| 68 | + |
| 69 | +@dataclass(frozen=True) |
| 70 | +class DownstreamBenchmarkSpec: |
| 71 | + """A benchmark definition without a computed value attached.""" |
| 72 | + |
| 73 | + name: str |
| 74 | + benchmark: float |
| 75 | + unit: str |
| 76 | + source: str |
| 77 | + |
| 78 | + |
| 79 | +DOWNSTREAM_BENCHMARKS_2024: tuple[DownstreamBenchmarkSpec, ...] = ( |
| 80 | + DownstreamBenchmarkSpec( |
| 81 | + name="income_tax", |
| 82 | + benchmark=2_400_000_000_000.0, |
| 83 | + unit="USD", |
| 84 | + source=( |
| 85 | + "IRS SOI 2022 total federal individual income tax liability " |
| 86 | + "~$2.22T; CBO 2024 projection ~$2.4T" |
| 87 | + ), |
| 88 | + ), |
| 89 | + DownstreamBenchmarkSpec( |
| 90 | + name="eitc", |
| 91 | + benchmark=64_000_000_000.0, |
| 92 | + unit="USD", |
| 93 | + source="IRS SOI 2023 EITC disbursed ~$64B (Table 2.5)", |
| 94 | + ), |
| 95 | + DownstreamBenchmarkSpec( |
| 96 | + name="ctc", |
| 97 | + benchmark=115_000_000_000.0, |
| 98 | + unit="USD", |
| 99 | + source=( |
| 100 | + "IRS SOI 2023 CTC disbursed ~$115B (pre-OBBBA CTC of $2,000 " |
| 101 | + "per qualifying child)" |
| 102 | + ), |
| 103 | + ), |
| 104 | + DownstreamBenchmarkSpec( |
| 105 | + name="snap", |
| 106 | + benchmark=100_000_000_000.0, |
| 107 | + unit="USD", |
| 108 | + source="USDA FNS FY2024 SNAP benefits total ~$100B", |
| 109 | + ), |
| 110 | + DownstreamBenchmarkSpec( |
| 111 | + name="ssi", |
| 112 | + benchmark=66_000_000_000.0, |
| 113 | + unit="USD", |
| 114 | + source="SSA SSI Annual Statistical Report 2024 ~$66B total payments", |
| 115 | + ), |
| 116 | + DownstreamBenchmarkSpec( |
| 117 | + name="aca_ptc", |
| 118 | + benchmark=60_000_000_000.0, |
| 119 | + unit="USD", |
| 120 | + source=( |
| 121 | + "CMS/IRS ACA Advance Premium Tax Credit & reconciled PTC " |
| 122 | + "2024 ~$60B (IRA-enhanced subsidies in effect)" |
| 123 | + ), |
| 124 | + ), |
| 125 | +) |
| 126 | + |
| 127 | + |
| 128 | +def compute_downstream_comparison( |
| 129 | + aggregates: dict[str, float], |
| 130 | + benchmarks: Iterable[DownstreamBenchmarkSpec], |
| 131 | +) -> dict[str, DownstreamBenchmark]: |
| 132 | + """Join computed aggregates to their external benchmarks. |
| 133 | +
|
| 134 | + Variables in ``aggregates`` without a matching benchmark are |
| 135 | + silently omitted — they're either not in the benchmark set or the |
| 136 | + caller passed extra diagnostic values. |
| 137 | + """ |
| 138 | + benchmark_by_name = {spec.name: spec for spec in benchmarks} |
| 139 | + result: dict[str, DownstreamBenchmark] = {} |
| 140 | + for name, computed in aggregates.items(): |
| 141 | + spec = benchmark_by_name.get(name) |
| 142 | + if spec is None: |
| 143 | + continue |
| 144 | + result[name] = DownstreamBenchmark( |
| 145 | + name=name, |
| 146 | + computed=float(computed), |
| 147 | + benchmark=spec.benchmark, |
| 148 | + unit=spec.unit, |
| 149 | + source=spec.source, |
| 150 | + ) |
| 151 | + return result |
| 152 | + |
| 153 | + |
| 154 | +def compute_downstream_aggregates( |
| 155 | + dataset_path: str | Path, |
| 156 | + period: int = 2024, |
| 157 | + variables: Iterable[str] = ( |
| 158 | + "income_tax", |
| 159 | + "eitc", |
| 160 | + "ctc", |
| 161 | + "snap", |
| 162 | + "ssi", |
| 163 | + "aca_ptc", |
| 164 | + ), |
| 165 | +) -> dict[str, float]: |
| 166 | + """Load a PolicyEngine-US dataset and compute weighted sums for ``variables``. |
| 167 | +
|
| 168 | + Returns a dict of variable → weighted aggregate (float). Requires |
| 169 | + ``policyengine_us`` to be installed. |
| 170 | + """ |
| 171 | + # Import lazily so the rest of this module (benchmark records, |
| 172 | + # comparison function) stays importable in environments without PE. |
| 173 | + from policyengine_us import Microsimulation # noqa: PLC0415 |
| 174 | + |
| 175 | + simulation = Microsimulation(dataset=str(dataset_path)) |
| 176 | + aggregates: dict[str, float] = {} |
| 177 | + for variable in variables: |
| 178 | + series = simulation.calculate(variable, period) |
| 179 | + aggregates[variable] = float(series.sum()) |
| 180 | + return aggregates |
0 commit comments