Skip to content

Commit 0e1175e

Browse files
committed
Replace manual negative income targets with SOI controls
1 parent 7c76aad commit 0e1175e

7 files changed

Lines changed: 628 additions & 19 deletions

File tree

paper/sections/methodology/loss_matrix.tex

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,12 @@ \subsubsection{CPS-Derived Statistics}
8989
\item Rent: \$735B
9090
\end{itemize}
9191

92-
\subsubsection{Market Income Targets}
92+
\subsubsection{Negative AGI and Loss Component Targets}
9393

94-
From IRS SOI PUF estimates:
94+
From IRS SOI Publication 1304 tables:
9595
\begin{itemize}
96-
\item Total negative household market income: -\$138B
97-
\item Count of households with negative market income: 3M
96+
\item All-return negative AGI amount and return count
97+
\item Taxable-return AGI-bin targets for positive-valued business, capital gains, estate, partnership/S-corp, and rent/royalty loss components
9898
\end{itemize}
9999

100100
\subsubsection{Healthcare Spending by Age}
@@ -150,4 +150,4 @@ \subsubsection{Target Validation}
150150
\item Consistent uprating factors applied across related targets
151151
\end{itemize}
152152

153-
The resulting 7,000+ targets provide comprehensive coverage of income distributions, program participation, demographic patterns, and tax expenditure utilization, ensuring the enhanced dataset accurately reflects the complexity of the US tax and benefit system. The majority of targets come from IRS Statistics of Income data (over 5,300 targets), supplemented by state-level demographic and program participation data (over 1,700 targets).
153+
The resulting 7,000+ targets provide comprehensive coverage of income distributions, program participation, demographic patterns, and tax expenditure utilization, ensuring the enhanced dataset accurately reflects the complexity of the US tax and benefit system. The majority of targets come from IRS Statistics of Income data (over 5,300 targets), supplemented by state-level demographic and program participation data (over 1,700 targets).

policyengine_us_data/calibration/target_config.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,38 @@ include:
313313
- variable: tax_unit_count
314314
geo_level: national
315315
domain_variable: adjusted_gross_income,taxable_interest_income
316+
# SOI AGI-binned loss-component targets. These replace rough manually
317+
# curated negative-income controls with source-backed component constraints.
318+
- variable: total_self_employment_income
319+
geo_level: national
320+
domain_variable: adjusted_gross_income,income_tax_before_credits,total_self_employment_income
321+
- variable: tax_unit_count
322+
geo_level: national
323+
domain_variable: adjusted_gross_income,income_tax_before_credits,total_self_employment_income
324+
- variable: loss_limited_net_capital_gains
325+
geo_level: national
326+
domain_variable: adjusted_gross_income,income_tax_before_credits,loss_limited_net_capital_gains
327+
- variable: tax_unit_count
328+
geo_level: national
329+
domain_variable: adjusted_gross_income,income_tax_before_credits,loss_limited_net_capital_gains
330+
- variable: estate_income
331+
geo_level: national
332+
domain_variable: adjusted_gross_income,income_tax_before_credits,estate_income
333+
- variable: tax_unit_count
334+
geo_level: national
335+
domain_variable: adjusted_gross_income,income_tax_before_credits,estate_income
336+
- variable: tax_unit_partnership_s_corp_income
337+
geo_level: national
338+
domain_variable: adjusted_gross_income,income_tax_before_credits,tax_unit_partnership_s_corp_income
339+
- variable: tax_unit_count
340+
geo_level: national
341+
domain_variable: adjusted_gross_income,income_tax_before_credits,tax_unit_partnership_s_corp_income
342+
- variable: tax_unit_rental_income
343+
geo_level: national
344+
domain_variable: adjusted_gross_income,income_tax_before_credits,tax_unit_rental_income
345+
- variable: tax_unit_count
346+
geo_level: national
347+
domain_variable: adjusted_gross_income,income_tax_before_credits,tax_unit_rental_income
316348
- variable: tax_exempt_interest_income
317349
geo_level: national
318350
domain_variable: tax_exempt_interest_income

policyengine_us_data/db/etl_irs_soi.py

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,11 +204,19 @@ def _skip_coarse_state_agi_person_count_target(geo_type: str, agi_stub: int) ->
204204
"adjusted_gross_income": "adjusted_gross_income",
205205
"count": "tax_unit_count",
206206
}
207+
SOI_NEGATIVE_AGI_TARGET_VARIABLES = dict(SOI_TAXABLE_AGI_TARGET_VARIABLES)
207208
SOI_TAXABLE_AGI_DOMAIN_TARGET_VARIABLES = {
208209
"employment_income": "irs_employment_income",
209210
"total_pension_income": "pension_income",
210211
"total_social_security": "social_security",
211212
}
213+
SOI_TAXABLE_LOSS_AGI_TARGET_VARIABLES = {
214+
"business_net_losses": "total_self_employment_income",
215+
"capital_gains_losses": "loss_limited_net_capital_gains",
216+
"estate_losses": "estate_income",
217+
"partnership_and_s_corp_losses": "tax_unit_partnership_s_corp_income",
218+
"rent_and_royalty_net_losses": "tax_unit_rental_income",
219+
}
212220
SOI_FILING_STATUS_CONSTRAINTS = {
213221
"Single": ("==", "SINGLE"),
214222
"Head of Household": ("==", "HEAD_OF_HOUSEHOLD"),
@@ -694,6 +702,110 @@ def _get_or_create_national_agi_domain_stratum(
694702
return stratum
695703

696704

705+
def _get_or_create_national_agi_stratum(
706+
session: Session,
707+
national_filer_stratum_id: int,
708+
*,
709+
agi_lower_bound: float,
710+
agi_upper_bound: float,
711+
) -> Stratum:
712+
note = f"National filers, AGI >= {agi_lower_bound}, AGI < {agi_upper_bound}"
713+
stratum = session.exec(
714+
select(Stratum).where(
715+
Stratum.parent_stratum_id == national_filer_stratum_id,
716+
Stratum.notes == note,
717+
)
718+
).first()
719+
if stratum:
720+
return stratum
721+
722+
stratum = Stratum(
723+
parent_stratum_id=national_filer_stratum_id,
724+
notes=note,
725+
)
726+
stratum.constraints_rel.extend(
727+
[
728+
StratumConstraint(
729+
constraint_variable="tax_unit_is_filer",
730+
operation="==",
731+
value="1",
732+
),
733+
StratumConstraint(
734+
constraint_variable="adjusted_gross_income",
735+
operation=">=",
736+
value=str(agi_lower_bound),
737+
),
738+
StratumConstraint(
739+
constraint_variable="adjusted_gross_income",
740+
operation="<",
741+
value=str(agi_upper_bound),
742+
),
743+
]
744+
)
745+
session.add(stratum)
746+
session.flush()
747+
return stratum
748+
749+
750+
def _get_or_create_national_taxable_agi_negative_domain_stratum(
751+
session: Session,
752+
national_filer_stratum_id: int,
753+
*,
754+
domain_variable: str,
755+
agi_lower_bound: float,
756+
agi_upper_bound: float,
757+
) -> Stratum:
758+
note = (
759+
"National taxable filers, AGI >= "
760+
f"{agi_lower_bound}, AGI < {agi_upper_bound}, {domain_variable} < 0"
761+
)
762+
stratum = session.exec(
763+
select(Stratum).where(
764+
Stratum.parent_stratum_id == national_filer_stratum_id,
765+
Stratum.notes == note,
766+
)
767+
).first()
768+
if stratum:
769+
return stratum
770+
771+
stratum = Stratum(
772+
parent_stratum_id=national_filer_stratum_id,
773+
notes=note,
774+
)
775+
stratum.constraints_rel.extend(
776+
[
777+
StratumConstraint(
778+
constraint_variable="tax_unit_is_filer",
779+
operation="==",
780+
value="1",
781+
),
782+
StratumConstraint(
783+
constraint_variable="income_tax_before_credits",
784+
operation=">",
785+
value="0",
786+
),
787+
StratumConstraint(
788+
constraint_variable="adjusted_gross_income",
789+
operation=">=",
790+
value=str(agi_lower_bound),
791+
),
792+
StratumConstraint(
793+
constraint_variable="adjusted_gross_income",
794+
operation="<",
795+
value=str(agi_upper_bound),
796+
),
797+
StratumConstraint(
798+
constraint_variable=domain_variable,
799+
operation="<",
800+
value="0",
801+
),
802+
]
803+
)
804+
session.add(stratum)
805+
session.flush()
806+
return stratum
807+
808+
697809
def _get_or_create_national_eitc_agi_child_stratum(
698810
session: Session,
699811
national_filer_stratum_id: int,
@@ -1122,6 +1234,86 @@ def load_national_taxable_agi_domain_filing_status_targets(
11221234
)
11231235

11241236

1237+
def load_national_negative_agi_targets(
1238+
session: Session,
1239+
national_filer_stratum_id: int,
1240+
target_year: int,
1241+
) -> None:
1242+
"""Create all-return negative-AGI amount and count targets."""
1243+
soi = get_soi(target_year)
1244+
rows = soi[
1245+
soi["Variable"].isin(SOI_NEGATIVE_AGI_TARGET_VARIABLES)
1246+
& (soi["Filing status"] == "All")
1247+
& (soi["AGI lower bound"] == -np.inf)
1248+
& (soi["AGI upper bound"] == 0)
1249+
& (~soi["Taxable only"])
1250+
].copy()
1251+
1252+
for _, row in rows.iterrows():
1253+
source_variable = row["Variable"]
1254+
target_variable = SOI_NEGATIVE_AGI_TARGET_VARIABLES[source_variable]
1255+
stratum = _get_or_create_national_agi_stratum(
1256+
session,
1257+
national_filer_stratum_id,
1258+
agi_lower_bound=float(row["AGI lower bound"]),
1259+
agi_upper_bound=float(row["AGI upper bound"]),
1260+
)
1261+
notes = (
1262+
f"Publication 1304 {row['SOI table']} all-return negative-AGI "
1263+
f"target (source year {int(row['Year'])}, row {int(row['XLSX row'])})"
1264+
)
1265+
_upsert_target(
1266+
session,
1267+
stratum_id=stratum.stratum_id,
1268+
variable=target_variable,
1269+
period=int(target_year),
1270+
value=float(row["Value"]),
1271+
source="IRS SOI",
1272+
notes=notes,
1273+
)
1274+
1275+
1276+
def load_national_taxable_loss_agi_targets(
1277+
session: Session,
1278+
national_filer_stratum_id: int,
1279+
target_year: int,
1280+
) -> None:
1281+
"""Create taxable loss-component targets by AGI band."""
1282+
soi = get_soi(target_year)
1283+
rows = soi[
1284+
soi["Variable"].isin(SOI_TAXABLE_LOSS_AGI_TARGET_VARIABLES)
1285+
& (soi["Filing status"] == "All")
1286+
& (soi["Taxable only"])
1287+
& (~soi["Full population"])
1288+
& (soi["Value"] > 0)
1289+
].copy()
1290+
1291+
for _, row in rows.iterrows():
1292+
source_variable = row["Variable"]
1293+
target_variable = SOI_TAXABLE_LOSS_AGI_TARGET_VARIABLES[source_variable]
1294+
stratum = _get_or_create_national_taxable_agi_negative_domain_stratum(
1295+
session,
1296+
national_filer_stratum_id,
1297+
domain_variable=target_variable,
1298+
agi_lower_bound=float(row["AGI lower bound"]),
1299+
agi_upper_bound=float(row["AGI upper bound"]),
1300+
)
1301+
notes = (
1302+
f"Publication 1304 {row['SOI table']} taxable AGI-band "
1303+
f"{source_variable} target "
1304+
f"(source year {int(row['Year'])}, row {int(row['XLSX row'])})"
1305+
)
1306+
_upsert_target(
1307+
session,
1308+
stratum_id=stratum.stratum_id,
1309+
variable="tax_unit_count" if bool(row["Count"]) else target_variable,
1310+
period=int(target_year),
1311+
value=(float(row["Value"]) if bool(row["Count"]) else -float(row["Value"])),
1312+
source="IRS SOI",
1313+
notes=notes,
1314+
)
1315+
1316+
11251317
def load_national_workbook_soi_targets(
11261318
session: Session, national_filer_stratum_id: int, target_year: int
11271319
) -> None:
@@ -1721,6 +1913,16 @@ def load_soi_data(
17211913
filer_strata["national"],
17221914
target_year or national_year,
17231915
)
1916+
load_national_negative_agi_targets(
1917+
session,
1918+
filer_strata["national"],
1919+
target_year or national_year,
1920+
)
1921+
load_national_taxable_loss_agi_targets(
1922+
session,
1923+
filer_strata["national"],
1924+
target_year or national_year,
1925+
)
17241926
load_national_fine_agi_targets(session, filer_strata["national"], national_year)
17251927
load_national_ltcg_agi_targets(session, filer_strata["national"], national_year)
17261928

policyengine_us_data/utils/loss.py

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,19 @@ def _cbo_program_target_value(sim, variable_name: str, time_period):
288288
"taxable_interest_income",
289289
}
290290

291+
SOI_NEGATIVE_AGI_TARGETED_VARIABLES = (
292+
"adjusted_gross_income",
293+
"count",
294+
)
295+
296+
AGI_LEVEL_LOSS_TARGETED_VARIABLES = (
297+
"business_net_losses",
298+
"capital_gains_losses",
299+
"estate_losses",
300+
"partnership_and_s_corp_losses",
301+
"rent_and_royalty_net_losses",
302+
)
303+
291304
AGI_LEVEL_TARGETED_VARIABLES = (
292305
"adjusted_gross_income",
293306
"count",
@@ -1217,15 +1230,31 @@ def get_target_loss_weights(target_names):
12171230
return weights
12181231

12191232

1233+
def _is_negative_agi_all_returns_row(row) -> bool:
1234+
return (
1235+
row["Variable"] in SOI_NEGATIVE_AGI_TARGETED_VARIABLES
1236+
and row["Filing status"] == "All"
1237+
and row["AGI lower bound"] == -np.inf
1238+
and row["AGI upper bound"] == 0
1239+
and not row["Taxable only"]
1240+
)
1241+
1242+
12201243
def _should_skip_soi_agi_row(row) -> bool:
1221-
"""Skip fragile low-AGI SOI rows except for investment-income controls."""
1244+
"""Skip fragile low-AGI SOI rows except selected source-backed controls."""
1245+
if _is_negative_agi_all_returns_row(row):
1246+
return False
1247+
if row["Variable"] in AGI_LEVEL_LOSS_TARGETED_VARIABLES:
1248+
return False
12221249
if row["AGI upper bound"] > 10_000:
12231250
return False
12241251
return row["Variable"] not in LOW_AGI_INVESTMENT_INCOME_SOI_VARIABLES
12251252

12261253

12271254
def _should_skip_soi_taxability_row(row) -> bool:
12281255
"""Use all-return SOI rows only for investment-income controls."""
1256+
if _is_negative_agi_all_returns_row(row):
1257+
return False
12291258
if row["Variable"] in LOW_AGI_INVESTMENT_INCOME_SOI_VARIABLES:
12301259
return row["Taxable only"]
12311260
return not row["Taxable only"]
@@ -1244,8 +1273,14 @@ def build_loss_matrix(dataset: type, time_period):
12441273
for variable in AGGREGATE_LEVEL_TARGETED_VARIABLES
12451274
if variable in df.columns
12461275
]
1276+
agi_level_loss_targeted_variables = [
1277+
variable
1278+
for variable in AGI_LEVEL_LOSS_TARGETED_VARIABLES
1279+
if variable in df.columns
1280+
]
12471281
soi_subset = soi_subset[
12481282
soi_subset.Variable.isin(AGI_LEVEL_TARGETED_VARIABLES)
1283+
| soi_subset.Variable.isin(agi_level_loss_targeted_variables)
12491284
| (
12501285
soi_subset.Variable.isin(aggregate_level_targeted_variables)
12511286
& (soi_subset["AGI lower bound"] == -np.inf)
@@ -1259,6 +1294,9 @@ def build_loss_matrix(dataset: type, time_period):
12591294
if _should_skip_soi_agi_row(row):
12601295
continue
12611296

1297+
if row["Variable"] in AGI_LEVEL_LOSS_TARGETED_VARIABLES and row["Value"] <= 0:
1298+
continue
1299+
12621300
mask = (
12631301
(agi >= row["AGI lower bound"]) * (agi < row["AGI upper bound"]) * filer
12641302
) > 0
@@ -1587,19 +1625,6 @@ def build_loss_matrix(dataset: type, time_period):
15871625
time_period,
15881626
)
15891627

1590-
# Negative household market income total rough estimate from the IRS SOI PUF
1591-
1592-
market_income = sim.calculate("household_market_income").values
1593-
loss_matrix["nation/irs/negative_household_market_income_total"] = market_income * (
1594-
market_income < 0
1595-
)
1596-
targets_array.append(-138e9)
1597-
1598-
loss_matrix["nation/irs/negative_household_market_income_count"] = (
1599-
market_income < 0
1600-
).astype(float)
1601-
targets_array.append(3e6)
1602-
16031628
# Healthcare spending by age.
16041629
# Each row targets a decade of ages (lower_bound to lower_bound + 9).
16051630
# The top row is treated as unbounded (age >= lower_bound) so the

0 commit comments

Comments
 (0)