Skip to content

Commit c50bebb

Browse files
committed
Avoid overlapping state AGI top-tail targets
1 parent 6573ad2 commit c50bebb

2 files changed

Lines changed: 24 additions & 0 deletions

File tree

policyengine_us_data/db/etl_irs_soi.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,19 @@
7878
28: (10_000_000, np.inf), # row 28
7979
}
8080

81+
82+
def _skip_coarse_state_agi_person_count_target(geo_type: str, agi_stub: int) -> bool:
83+
"""Skip the coarse state 500k+ count target when fine state bins are loaded.
84+
85+
The standard geography-file SOI feed only has a top-coded state AGI stub 9
86+
(500k+). We separately load `in55cmcsv`, which splits that state tail into
87+
500k-1m and 1m+. Keeping the coarse state count target alongside the fine
88+
rows would double-constrain the same top-tail population in calibration.
89+
"""
90+
91+
return geo_type == "state" and agi_stub == 9
92+
93+
8194
# These variables map cleanly from Publication 1304 aggregate tables to the
8295
# existing national IRS-SOI domain strata. We intentionally leave `aca_ptc`
8396
# and `refundable_ctc` on the geography-file path for now because the
@@ -1244,6 +1257,9 @@ def load_soi_data(long_dfs, year, national_year: Optional[int] = None):
12441257
geo_info = parse_ucgid(ucgid_i)
12451258
person_count = agi_df.iloc[i][["target_value"]].values[0]
12461259

1260+
if _skip_coarse_state_agi_person_count_target(geo_info["type"], agi_stub):
1261+
continue
1262+
12471263
if geo_info["type"] == "state":
12481264
parent_stratum_id = filer_strata["state"][geo_info["state_fips"]]
12491265
note = f"State FIPS {geo_info['state_fips']} filers, AGI >= {agi_income_lower}, AGI < {agi_income_upper}"

tests/unit/test_etl_irs_soi_overlay.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
create_database,
1212
)
1313
from policyengine_us_data.db.etl_irs_soi import (
14+
_skip_coarse_state_agi_person_count_target,
1415
_get_or_create_national_domain_stratum,
1516
_upsert_target,
1617
load_national_workbook_soi_targets,
@@ -180,3 +181,10 @@ def fake_get_tracked_soi_row(variable, requested_year, **kwargs):
180181
assert len(count_rows) == 1
181182
assert int(count_rows.iloc[0]["period"]) == 2023
182183
assert float(count_rows.iloc[0]["value"]) == 50.0
184+
185+
186+
def test_skip_coarse_state_agi_person_count_target_only_for_state_stub_9():
187+
assert _skip_coarse_state_agi_person_count_target("state", 9) is True
188+
assert _skip_coarse_state_agi_person_count_target("state", 8) is False
189+
assert _skip_coarse_state_agi_person_count_target("district", 9) is False
190+
assert _skip_coarse_state_agi_person_count_target("national", 9) is False

0 commit comments

Comments
 (0)