diff --git a/changelog.d/1141.changed.md b/changelog.d/1141.changed.md new file mode 100644 index 000000000..42266bcae --- /dev/null +++ b/changelog.d/1141.changed.md @@ -0,0 +1 @@ +Replace manually curated negative income calibration targets with source-backed SOI negative AGI and loss-component controls. diff --git a/paper/sections/methodology/loss_matrix.tex b/paper/sections/methodology/loss_matrix.tex index 23577ad28..080b4ac68 100644 --- a/paper/sections/methodology/loss_matrix.tex +++ b/paper/sections/methodology/loss_matrix.tex @@ -89,12 +89,12 @@ \subsubsection{CPS-Derived Statistics} \item Rent: \$735B \end{itemize} -\subsubsection{Market Income Targets} +\subsubsection{Negative AGI and Loss Component Targets} -From IRS SOI PUF estimates: +From IRS SOI Publication 1304 tables: \begin{itemize} - \item Total negative household market income: -\$138B - \item Count of households with negative market income: 3M + \item All-return negative AGI amount and return count + \item Taxable-return AGI-bin targets for positive-valued business, capital gains, estate, partnership/S-corp, and rent/royalty loss components \end{itemize} \subsubsection{Healthcare Spending by Age} @@ -150,4 +150,4 @@ \subsubsection{Target Validation} \item Consistent uprating factors applied across related targets \end{itemize} -The resulting 7,000+ targets provide comprehensive coverage of income distributions, program participation, demographic patterns, and tax expenditure utilization, ensuring the enhanced dataset accurately reflects the complexity of the US tax and benefit system. The majority of targets come from IRS Statistics of Income data (over 5,300 targets), supplemented by state-level demographic and program participation data (over 1,700 targets). \ No newline at end of file +The resulting 7,000+ targets provide comprehensive coverage of income distributions, program participation, demographic patterns, and tax expenditure utilization, ensuring the enhanced dataset accurately reflects the complexity of the US tax and benefit system. The majority of targets come from IRS Statistics of Income data (over 5,300 targets), supplemented by state-level demographic and program participation data (over 1,700 targets). diff --git a/policyengine_us_data/calibration/target_config.yaml b/policyengine_us_data/calibration/target_config.yaml index 69c413d66..8e2835213 100644 --- a/policyengine_us_data/calibration/target_config.yaml +++ b/policyengine_us_data/calibration/target_config.yaml @@ -313,6 +313,28 @@ include: - variable: tax_unit_count geo_level: national domain_variable: adjusted_gross_income,taxable_interest_income + # SOI AGI-binned loss-component targets. These replace rough manually + # curated negative-income controls with source-backed component constraints. + # Include the loss components that have tax-unit-level PolicyEngine + # variables, so the DB matrix matches SOI return-level netting. + - variable: loss_limited_net_capital_gains + geo_level: national + domain_variable: adjusted_gross_income,income_tax_before_credits,loss_limited_net_capital_gains + - variable: tax_unit_count + geo_level: national + domain_variable: adjusted_gross_income,income_tax_before_credits,loss_limited_net_capital_gains + - variable: tax_unit_partnership_s_corp_income + geo_level: national + domain_variable: adjusted_gross_income,income_tax_before_credits,tax_unit_partnership_s_corp_income + - variable: tax_unit_count + geo_level: national + domain_variable: adjusted_gross_income,income_tax_before_credits,tax_unit_partnership_s_corp_income + - variable: tax_unit_rental_income + geo_level: national + domain_variable: adjusted_gross_income,income_tax_before_credits,tax_unit_rental_income + - variable: tax_unit_count + geo_level: national + domain_variable: adjusted_gross_income,income_tax_before_credits,tax_unit_rental_income - variable: tax_exempt_interest_income geo_level: national domain_variable: tax_exempt_interest_income diff --git a/policyengine_us_data/db/etl_irs_soi.py b/policyengine_us_data/db/etl_irs_soi.py index c74dfce15..b0fce4c57 100644 --- a/policyengine_us_data/db/etl_irs_soi.py +++ b/policyengine_us_data/db/etl_irs_soi.py @@ -204,11 +204,17 @@ def _skip_coarse_state_agi_person_count_target(geo_type: str, agi_stub: int) -> "adjusted_gross_income": "adjusted_gross_income", "count": "tax_unit_count", } +SOI_NEGATIVE_AGI_TARGET_VARIABLES = dict(SOI_TAXABLE_AGI_TARGET_VARIABLES) SOI_TAXABLE_AGI_DOMAIN_TARGET_VARIABLES = { "employment_income": "irs_employment_income", "total_pension_income": "pension_income", "total_social_security": "social_security", } +SOI_TAXABLE_LOSS_AGI_TARGET_VARIABLES = { + "capital_gains_losses": "loss_limited_net_capital_gains", + "partnership_and_s_corp_losses": "tax_unit_partnership_s_corp_income", + "rent_and_royalty_net_losses": "tax_unit_rental_income", +} SOI_FILING_STATUS_CONSTRAINTS = { "Single": ("==", "SINGLE"), "Head of Household": ("==", "HEAD_OF_HOUSEHOLD"), @@ -694,6 +700,110 @@ def _get_or_create_national_agi_domain_stratum( return stratum +def _get_or_create_national_agi_stratum( + session: Session, + national_filer_stratum_id: int, + *, + agi_lower_bound: float, + agi_upper_bound: float, +) -> Stratum: + note = f"National filers, AGI >= {agi_lower_bound}, AGI < {agi_upper_bound}" + stratum = session.exec( + select(Stratum).where( + Stratum.parent_stratum_id == national_filer_stratum_id, + Stratum.notes == note, + ) + ).first() + if stratum: + return stratum + + stratum = Stratum( + parent_stratum_id=national_filer_stratum_id, + notes=note, + ) + stratum.constraints_rel.extend( + [ + StratumConstraint( + constraint_variable="tax_unit_is_filer", + operation="==", + value="1", + ), + StratumConstraint( + constraint_variable="adjusted_gross_income", + operation=">=", + value=str(agi_lower_bound), + ), + StratumConstraint( + constraint_variable="adjusted_gross_income", + operation="<", + value=str(agi_upper_bound), + ), + ] + ) + session.add(stratum) + session.flush() + return stratum + + +def _get_or_create_national_taxable_agi_negative_domain_stratum( + session: Session, + national_filer_stratum_id: int, + *, + domain_variable: str, + agi_lower_bound: float, + agi_upper_bound: float, +) -> Stratum: + note = ( + "National taxable filers, AGI >= " + f"{agi_lower_bound}, AGI < {agi_upper_bound}, {domain_variable} < 0" + ) + stratum = session.exec( + select(Stratum).where( + Stratum.parent_stratum_id == national_filer_stratum_id, + Stratum.notes == note, + ) + ).first() + if stratum: + return stratum + + stratum = Stratum( + parent_stratum_id=national_filer_stratum_id, + notes=note, + ) + stratum.constraints_rel.extend( + [ + StratumConstraint( + constraint_variable="tax_unit_is_filer", + operation="==", + value="1", + ), + StratumConstraint( + constraint_variable="income_tax_before_credits", + operation=">", + value="0", + ), + StratumConstraint( + constraint_variable="adjusted_gross_income", + operation=">=", + value=str(agi_lower_bound), + ), + StratumConstraint( + constraint_variable="adjusted_gross_income", + operation="<", + value=str(agi_upper_bound), + ), + StratumConstraint( + constraint_variable=domain_variable, + operation="<", + value="0", + ), + ] + ) + session.add(stratum) + session.flush() + return stratum + + def _get_or_create_national_eitc_agi_child_stratum( session: Session, national_filer_stratum_id: int, @@ -1122,6 +1232,86 @@ def load_national_taxable_agi_domain_filing_status_targets( ) +def load_national_negative_agi_targets( + session: Session, + national_filer_stratum_id: int, + target_year: int, +) -> None: + """Create all-return negative-AGI amount and count targets.""" + soi = get_soi(target_year) + rows = soi[ + soi["Variable"].isin(SOI_NEGATIVE_AGI_TARGET_VARIABLES) + & (soi["Filing status"] == "All") + & (soi["AGI lower bound"] == -np.inf) + & (soi["AGI upper bound"] == 0) + & (~soi["Taxable only"]) + ].copy() + + for _, row in rows.iterrows(): + source_variable = row["Variable"] + target_variable = SOI_NEGATIVE_AGI_TARGET_VARIABLES[source_variable] + stratum = _get_or_create_national_agi_stratum( + session, + national_filer_stratum_id, + agi_lower_bound=float(row["AGI lower bound"]), + agi_upper_bound=float(row["AGI upper bound"]), + ) + notes = ( + f"Publication 1304 {row['SOI table']} all-return negative-AGI " + f"target (source year {int(row['Year'])}, row {int(row['XLSX row'])})" + ) + _upsert_target( + session, + stratum_id=stratum.stratum_id, + variable=target_variable, + period=int(target_year), + value=float(row["Value"]), + source="IRS SOI", + notes=notes, + ) + + +def load_national_taxable_loss_agi_targets( + session: Session, + national_filer_stratum_id: int, + target_year: int, +) -> None: + """Create taxable loss-component targets by AGI band.""" + soi = get_soi(target_year) + rows = soi[ + soi["Variable"].isin(SOI_TAXABLE_LOSS_AGI_TARGET_VARIABLES) + & (soi["Filing status"] == "All") + & (soi["Taxable only"]) + & (~soi["Full population"]) + & (soi["Value"] > 0) + ].copy() + + for _, row in rows.iterrows(): + source_variable = row["Variable"] + target_variable = SOI_TAXABLE_LOSS_AGI_TARGET_VARIABLES[source_variable] + stratum = _get_or_create_national_taxable_agi_negative_domain_stratum( + session, + national_filer_stratum_id, + domain_variable=target_variable, + agi_lower_bound=float(row["AGI lower bound"]), + agi_upper_bound=float(row["AGI upper bound"]), + ) + notes = ( + f"Publication 1304 {row['SOI table']} taxable AGI-band " + f"{source_variable} target " + f"(source year {int(row['Year'])}, row {int(row['XLSX row'])})" + ) + _upsert_target( + session, + stratum_id=stratum.stratum_id, + variable="tax_unit_count" if bool(row["Count"]) else target_variable, + period=int(target_year), + value=(float(row["Value"]) if bool(row["Count"]) else -float(row["Value"])), + source="IRS SOI", + notes=notes, + ) + + def load_national_workbook_soi_targets( session: Session, national_filer_stratum_id: int, target_year: int ) -> None: @@ -1721,6 +1911,16 @@ def load_soi_data( filer_strata["national"], target_year or national_year, ) + load_national_negative_agi_targets( + session, + filer_strata["national"], + target_year or national_year, + ) + load_national_taxable_loss_agi_targets( + session, + filer_strata["national"], + target_year or national_year, + ) load_national_fine_agi_targets(session, filer_strata["national"], national_year) load_national_ltcg_agi_targets(session, filer_strata["national"], national_year) diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index 84c620bd3..54fb4bc9e 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -288,6 +288,19 @@ def _cbo_program_target_value(sim, variable_name: str, time_period): "taxable_interest_income", } +SOI_NEGATIVE_AGI_TARGETED_VARIABLES = ( + "adjusted_gross_income", + "count", +) + +AGI_LEVEL_LOSS_TARGETED_VARIABLES = ( + "business_net_losses", + "capital_gains_losses", + "estate_losses", + "partnership_and_s_corp_losses", + "rent_and_royalty_net_losses", +) + AGI_LEVEL_TARGETED_VARIABLES = ( "adjusted_gross_income", "count", @@ -1217,8 +1230,22 @@ def get_target_loss_weights(target_names): return weights +def _is_negative_agi_all_returns_row(row) -> bool: + return ( + row["Variable"] in SOI_NEGATIVE_AGI_TARGETED_VARIABLES + and row["Filing status"] == "All" + and row["AGI lower bound"] == -np.inf + and row["AGI upper bound"] == 0 + and not row["Taxable only"] + ) + + def _should_skip_soi_agi_row(row) -> bool: - """Skip fragile low-AGI SOI rows except for investment-income controls.""" + """Skip fragile low-AGI SOI rows except selected source-backed controls.""" + if _is_negative_agi_all_returns_row(row): + return False + if row["Variable"] in AGI_LEVEL_LOSS_TARGETED_VARIABLES: + return False if row["AGI upper bound"] > 10_000: return False return row["Variable"] not in LOW_AGI_INVESTMENT_INCOME_SOI_VARIABLES @@ -1226,6 +1253,8 @@ def _should_skip_soi_agi_row(row) -> bool: def _should_skip_soi_taxability_row(row) -> bool: """Use all-return SOI rows only for investment-income controls.""" + if _is_negative_agi_all_returns_row(row): + return False if row["Variable"] in LOW_AGI_INVESTMENT_INCOME_SOI_VARIABLES: return row["Taxable only"] return not row["Taxable only"] @@ -1244,8 +1273,14 @@ def build_loss_matrix(dataset: type, time_period): for variable in AGGREGATE_LEVEL_TARGETED_VARIABLES if variable in df.columns ] + agi_level_loss_targeted_variables = [ + variable + for variable in AGI_LEVEL_LOSS_TARGETED_VARIABLES + if variable in df.columns + ] soi_subset = soi_subset[ soi_subset.Variable.isin(AGI_LEVEL_TARGETED_VARIABLES) + | soi_subset.Variable.isin(agi_level_loss_targeted_variables) | ( soi_subset.Variable.isin(aggregate_level_targeted_variables) & (soi_subset["AGI lower bound"] == -np.inf) @@ -1259,6 +1294,9 @@ def build_loss_matrix(dataset: type, time_period): if _should_skip_soi_agi_row(row): continue + if row["Variable"] in AGI_LEVEL_LOSS_TARGETED_VARIABLES and row["Value"] <= 0: + continue + mask = ( (agi >= row["AGI lower bound"]) * (agi < row["AGI upper bound"]) * filer ) > 0 @@ -1587,19 +1625,6 @@ def build_loss_matrix(dataset: type, time_period): time_period, ) - # Negative household market income total rough estimate from the IRS SOI PUF - - market_income = sim.calculate("household_market_income").values - loss_matrix["nation/irs/negative_household_market_income_total"] = market_income * ( - market_income < 0 - ) - targets_array.append(-138e9) - - loss_matrix["nation/irs/negative_household_market_income_count"] = ( - market_income < 0 - ).astype(float) - targets_array.append(3e6) - # Healthcare spending by age. # Each row targets a decade of ages (lower_bound to lower_bound + 9). # The top row is treated as unbounded (age >= lower_bound) so the diff --git a/pyproject.toml b/pyproject.toml index 6302df0c3..0ff9663b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,11 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - # Temporary GitHub pin: policyengine-us 1.706.14 is blocked from PyPI by - # the project-size limit, but us-data needs the merged desired retirement - # contribution variables, FLSA overtime constants, and data-backed - # Medicaid cost input before the next PyPI release is available. - "policyengine-us @ git+https://github.com/PolicyEngine/policyengine-us.git@1da04a64dcdce26834b063d68daa835765a5d8ed", + "policyengine-us==1.709.1", # policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for # PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost # after _invalidate_all_caches) and is required by policyengine-us 1.682.1+. diff --git a/tests/unit/calibration/test_loss_targets.py b/tests/unit/calibration/test_loss_targets.py index a40b81046..8c7920773 100644 --- a/tests/unit/calibration/test_loss_targets.py +++ b/tests/unit/calibration/test_loss_targets.py @@ -9,6 +9,7 @@ ABSOLUTE_ERROR_SCALE_TARGETS, AGE_BUCKETED_HEALTH_TARGETS, AGGREGATE_LEVEL_TARGETED_VARIABLES, + AGI_LEVEL_LOSS_TARGETED_VARIABLES, AGI_LEVEL_TARGETED_VARIABLES, BEA_NIPA_DIRECT_SUM_TARGETS, BEA_NIPA_DIRECT_SUM_LOSS_WEIGHT, @@ -16,6 +17,7 @@ BLS_CE_TOTALS, HARD_CODED_TOTALS, LOW_AGI_INVESTMENT_INCOME_SOI_VARIABLES, + SOI_NEGATIVE_AGI_TARGETED_VARIABLES, TRANSFER_BALANCE_TARGETS, _add_bea_state_wage_targets, _add_agi_metric_columns, @@ -75,6 +77,57 @@ def test_legacy_loss_targets_include_ltcg_agi_grid(): assert top_bracket["Value"].iat[0] == 346_272_458_000 +def test_legacy_loss_targets_include_soi_loss_agi_grid(): + assert AGI_LEVEL_LOSS_TARGETED_VARIABLES == ( + "business_net_losses", + "capital_gains_losses", + "estate_losses", + "partnership_and_s_corp_losses", + "rent_and_royalty_net_losses", + ) + + soi = pd.read_csv(CALIBRATION_FOLDER / "soi_targets.csv") + loss_rows = soi[ + soi["Variable"].isin(AGI_LEVEL_LOSS_TARGETED_VARIABLES) + & (soi["SOI table"] == "Table 1.4") + & (soi["Filing status"] == "All") + & (soi["Taxable only"]) + & (~soi["Full population"]) + ] + + assert set(loss_rows["Variable"]) == set(AGI_LEVEL_LOSS_TARGETED_VARIABLES) + assert loss_rows.groupby(["Variable", "Count"]).size().min() >= 19 + assert loss_rows["Value"].ge(0).all() + assert ( + loss_rows[loss_rows["Value"] > 0].groupby(["Variable", "Count"]).size().min() + >= 1 + ) + + +def test_legacy_loss_targets_include_soi_negative_agi_controls(): + assert SOI_NEGATIVE_AGI_TARGETED_VARIABLES == ( + "adjusted_gross_income", + "count", + ) + + soi = pd.read_csv(CALIBRATION_FOLDER / "soi_targets.csv") + negative_agi = soi[ + soi["Variable"].isin(SOI_NEGATIVE_AGI_TARGETED_VARIABLES) + & (soi["SOI table"] == "Table 1.1") + & (soi["Filing status"] == "All") + & (soi["AGI lower bound"] == -np.inf) + & (soi["AGI upper bound"] == 0) + & (~soi["Taxable only"]) + ] + + assert set(negative_agi["Variable"]) == set(SOI_NEGATIVE_AGI_TARGETED_VARIABLES) + latest_negative_agi = negative_agi[ + negative_agi["Year"] == negative_agi["Year"].max() + ].set_index("Variable") + assert latest_negative_agi.loc["adjusted_gross_income", "Value"] < 0 + assert latest_negative_agi.loc["count", "Value"] > 0 + + def test_bea_nipa_direct_sum_targets_match_targets_db(): loss_targets_by_variable = { variable: target for _, variable, target in BEA_NIPA_DIRECT_SUM_TARGETS @@ -797,6 +850,18 @@ def test_low_agi_soi_skip_keeps_investment_income_targets(): ltcg_low_agi_row = pd.Series( {"Variable": "long_term_capital_gains", "AGI upper bound": 10_000.0} ) + loss_low_agi_row = pd.Series( + {"Variable": "partnership_and_s_corp_losses", "AGI upper bound": 10_000.0} + ) + negative_agi_all_return_row = pd.Series( + { + "Variable": "adjusted_gross_income", + "Filing status": "All", + "AGI lower bound": -np.inf, + "AGI upper bound": 0.0, + "Taxable only": False, + } + ) ordinary_higher_agi_row = pd.Series( {"Variable": "employment_income", "AGI upper bound": 25_000.0} ) @@ -804,6 +869,8 @@ def test_low_agi_soi_skip_keeps_investment_income_targets(): assert _should_skip_soi_agi_row(ordinary_low_agi_row) assert not _should_skip_soi_agi_row(capital_income_low_agi_row) assert not _should_skip_soi_agi_row(ltcg_low_agi_row) + assert not _should_skip_soi_agi_row(loss_low_agi_row) + assert not _should_skip_soi_agi_row(negative_agi_all_return_row) assert not _should_skip_soi_agi_row(ordinary_higher_agi_row) @@ -817,6 +884,15 @@ def test_all_return_soi_skip_keeps_investment_income_targets(): ltcg_all_return_row = pd.Series( {"Variable": "long_term_capital_gains", "Taxable only": False} ) + negative_agi_all_return_row = pd.Series( + { + "Variable": "adjusted_gross_income", + "Filing status": "All", + "AGI lower bound": -np.inf, + "AGI upper bound": 0.0, + "Taxable only": False, + } + ) ordinary_taxable_row = pd.Series( {"Variable": "employment_income", "Taxable only": True} ) @@ -836,6 +912,7 @@ def test_all_return_soi_skip_keeps_investment_income_targets(): assert _should_skip_soi_taxability_row(ordinary_all_return_row) assert not _should_skip_soi_taxability_row(capital_income_all_return_row) assert not _should_skip_soi_taxability_row(ltcg_all_return_row) + assert not _should_skip_soi_taxability_row(negative_agi_all_return_row) assert not _should_skip_soi_taxability_row(ordinary_taxable_row) assert not _should_skip_soi_taxability_row(qbi_taxable_row) assert _should_skip_soi_taxability_row(capital_income_taxable_row) @@ -878,6 +955,13 @@ def test_national_loss_excludes_survey_spm_threshold_decile_targets(): assert "count_in_spm_threshold_decile" not in source +def test_national_loss_excludes_manual_negative_household_market_income_targets(): + source = inspect.getsource(build_loss_matrix) + + assert "negative_household_market_income" not in source + assert "-138e9" not in source + + def test_add_medicare_enrollment_target(monkeypatch): monkeypatch.setattr( "policyengine_us_data.utils.loss.get_medicare_enrollment_target", diff --git a/tests/unit/calibration/test_target_config.py b/tests/unit/calibration/test_target_config.py index fe7b3055e..09203f745 100644 --- a/tests/unit/calibration/test_target_config.py +++ b/tests/unit/calibration/test_target_config.py @@ -402,6 +402,50 @@ def test_training_config_includes_soi_ltcg_target(self): "geo_level": "national", } in config["include"] + def test_training_config_includes_soi_loss_component_agi_targets(self): + config = load_target_config( + str( + Path(__file__).resolve().parents[3] + / "policyengine_us_data" + / "calibration" + / "target_config.yaml" + ) + ) + + include_rules = config["include"] + for variable in [ + "loss_limited_net_capital_gains", + "tax_unit_partnership_s_corp_income", + "tax_unit_rental_income", + ]: + assert { + "variable": variable, + "geo_level": "national", + "domain_variable": ( + f"adjusted_gross_income,income_tax_before_credits,{variable}" + ), + } in include_rules + assert { + "variable": "tax_unit_count", + "geo_level": "national", + "domain_variable": ( + f"adjusted_gross_income,income_tax_before_credits,{variable}" + ), + } in include_rules + + for person_level_loss_variable in [ + "total_self_employment_income", + "estate_income", + ]: + assert { + "variable": person_level_loss_variable, + "geo_level": "national", + "domain_variable": ( + "adjusted_gross_income,income_tax_before_credits," + f"{person_level_loss_variable}" + ), + } not in include_rules + def test_training_config_excludes_national_undifferentiated_eitc_target(self): config = load_target_config( str( diff --git a/tests/unit/test_etl_irs_soi_overlay.py b/tests/unit/test_etl_irs_soi_overlay.py index 8f80a04a3..25a9ee2e0 100644 --- a/tests/unit/test_etl_irs_soi_overlay.py +++ b/tests/unit/test_etl_irs_soi_overlay.py @@ -14,6 +14,7 @@ ) from policyengine_us_data.db.etl_irs_soi import ( GEOGRAPHY_FILE_TARGET_SPECS, + SOI_TAXABLE_LOSS_AGI_TARGET_VARIABLES, WORKBOOK_NATIONAL_DOMAIN_TARGETS, get_geography_soi_year, get_national_geography_soi_agi_targets, @@ -26,8 +27,10 @@ load_national_geography_ctc_agi_targets, load_national_geography_ctc_targets, load_national_ltcg_agi_targets, + load_national_negative_agi_targets, load_national_taxable_agi_domain_filing_status_targets, load_national_taxable_agi_filing_status_targets, + load_national_taxable_loss_agi_targets, load_national_workbook_soi_targets, load_state_eitc_claim_count_targets, ) @@ -1023,6 +1026,245 @@ def fake_get_soi(year: int) -> pd.DataFrame: ) in constraint_set +def test_load_national_negative_agi_targets_creates_all_return_rows( + monkeypatch, tmp_path +): + db_uri, engine = _create_test_engine(tmp_path) + soi_rows = pd.DataFrame( + [ + { + "Year": 2023, + "SOI table": "Table 1.1", + "XLSX column": "C", + "XLSX row": 10, + "Variable": "adjusted_gross_income", + "Filing status": "All", + "AGI lower bound": -np.inf, + "AGI upper bound": 0.0, + "Count": False, + "Taxable only": False, + "Full population": False, + "Value": -1_000_000.0, + }, + { + "Year": 2023, + "SOI table": "Table 1.1", + "XLSX column": "B", + "XLSX row": 10, + "Variable": "count", + "Filing status": "All", + "AGI lower bound": -np.inf, + "AGI upper bound": 0.0, + "Count": True, + "Taxable only": False, + "Full population": False, + "Value": 20_000.0, + }, + { + "Year": 2023, + "SOI table": "Table 1.1", + "XLSX column": "C", + "XLSX row": 10, + "Variable": "adjusted_gross_income", + "Filing status": "All", + "AGI lower bound": -np.inf, + "AGI upper bound": 0.0, + "Count": False, + "Taxable only": True, + "Full population": False, + "Value": -999.0, + }, + ] + ) + + monkeypatch.setattr( + "policyengine_us_data.db.etl_irs_soi.get_soi", + lambda year: soi_rows, + ) + + with Session(engine) as session: + national_filer_stratum = _create_national_filer_stratum(session) + load_national_negative_agi_targets( + session, + national_filer_stratum.stratum_id, + target_year=2024, + ) + session.commit() + + builder = UnifiedMatrixBuilder(db_uri=db_uri, time_period=2024) + rows = builder._query_targets( + { + "variables": ["adjusted_gross_income", "tax_unit_count"], + "domain_variables": ["adjusted_gross_income"], + } + ) + + assert set(rows["variable"]) == {"adjusted_gross_income", "tax_unit_count"} + assert ( + rows.set_index("variable").loc["adjusted_gross_income", "value"] == -1_000_000.0 + ) + assert rows.set_index("variable").loc["tax_unit_count", "value"] == 20_000.0 + + with engine.connect() as conn: + constraints = conn.execute( + text( + """ + SELECT tv.variable, sc.constraint_variable, sc.operation, sc.value + FROM target_overview tv + JOIN stratum_constraints sc ON tv.stratum_id = sc.stratum_id + ORDER BY tv.variable, sc.constraint_variable, sc.operation + """ + ) + ).fetchall() + + constraint_set = { + (target_variable, variable, operation, constraint_value) + for target_variable, variable, operation, constraint_value in constraints + } + assert ( + "adjusted_gross_income", + "income_tax_before_credits", + ">", + "0", + ) not in constraint_set + assert ( + "adjusted_gross_income", + "adjusted_gross_income", + ">=", + "-inf", + ) in constraint_set + assert ( + "tax_unit_count", + "adjusted_gross_income", + "<", + "0.0", + ) in constraint_set + + +def test_load_national_taxable_loss_agi_targets_creates_negative_value_rows( + monkeypatch, tmp_path +): + db_uri, engine = _create_test_engine(tmp_path) + soi_rows = pd.DataFrame( + [ + { + "Year": 2023, + "SOI table": "Table 1.4", + "XLSX column": "AS", + "XLSX row": 21, + "Variable": "partnership_and_s_corp_losses", + "Filing status": "All", + "AGI lower bound": 100_000.0, + "AGI upper bound": 200_000.0, + "Count": False, + "Taxable only": True, + "Full population": False, + "Value": 5_000_000.0, + }, + { + "Year": 2023, + "SOI table": "Table 1.4", + "XLSX column": "AR", + "XLSX row": 21, + "Variable": "partnership_and_s_corp_losses", + "Filing status": "All", + "AGI lower bound": 100_000.0, + "AGI upper bound": 200_000.0, + "Count": True, + "Taxable only": True, + "Full population": False, + "Value": 7_000.0, + }, + { + "Year": 2023, + "SOI table": "Table 1.4", + "XLSX column": "AS", + "XLSX row": 21, + "Variable": "partnership_and_s_corp_losses", + "Filing status": "All", + "AGI lower bound": 100_000.0, + "AGI upper bound": 200_000.0, + "Count": False, + "Taxable only": False, + "Full population": False, + "Value": 999.0, + }, + ] + ) + + monkeypatch.setattr( + "policyengine_us_data.db.etl_irs_soi.get_soi", + lambda year: soi_rows, + ) + + with Session(engine) as session: + national_filer_stratum = _create_national_filer_stratum(session) + load_national_taxable_loss_agi_targets( + session, + national_filer_stratum.stratum_id, + target_year=2024, + ) + session.commit() + + builder = UnifiedMatrixBuilder(db_uri=db_uri, time_period=2024) + rows = builder._query_targets( + { + "variables": ["tax_unit_partnership_s_corp_income", "tax_unit_count"], + "domain_variables": [ + "adjusted_gross_income,income_tax_before_credits,tax_unit_partnership_s_corp_income" + ], + } + ) + + assert set(rows["variable"]) == { + "tax_unit_partnership_s_corp_income", + "tax_unit_count", + } + assert ( + rows.set_index("variable").loc["tax_unit_partnership_s_corp_income", "value"] + == -5_000_000.0 + ) + assert rows.set_index("variable").loc["tax_unit_count", "value"] == 7_000.0 + + with engine.connect() as conn: + constraints = conn.execute( + text( + """ + SELECT tv.variable, sc.constraint_variable, sc.operation, sc.value + FROM target_overview tv + JOIN stratum_constraints sc ON tv.stratum_id = sc.stratum_id + WHERE tv.variable = 'tax_unit_partnership_s_corp_income' + ORDER BY sc.constraint_variable + """ + ) + ).fetchall() + + constraint_set = { + (target_variable, variable, operation, constraint_value) + for target_variable, variable, operation, constraint_value in constraints + } + assert ( + "tax_unit_partnership_s_corp_income", + "income_tax_before_credits", + ">", + "0", + ) in constraint_set + assert ( + "tax_unit_partnership_s_corp_income", + "tax_unit_partnership_s_corp_income", + "<", + "0", + ) in constraint_set + + +def test_taxable_loss_agi_db_targets_use_tax_unit_safe_variables(): + assert SOI_TAXABLE_LOSS_AGI_TARGET_VARIABLES == { + "capital_gains_losses": "loss_limited_net_capital_gains", + "partnership_and_s_corp_losses": "tax_unit_partnership_s_corp_income", + "rent_and_royalty_net_losses": "tax_unit_rental_income", + } + + def test_load_state_eitc_claim_count_targets_creates_state_rows(monkeypatch, tmp_path): db_uri, engine = _create_test_engine(tmp_path) calibration_dir = tmp_path / "calibration_targets" diff --git a/uv.lock b/uv.lock index 842ca0c20..e0e4ab23d 100644 --- a/uv.lock +++ b/uv.lock @@ -2164,8 +2164,8 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.706.14" -source = { git = "https://github.com/PolicyEngine/policyengine-us.git?rev=1da04a64dcdce26834b063d68daa835765a5d8ed#1da04a64dcdce26834b063d68daa835765a5d8ed" } +version = "1.709.1" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, { name = "pandas" }, @@ -2174,6 +2174,10 @@ dependencies = [ { name = "tables" }, { name = "tqdm" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/20/dc/068115100ff17bb7245e7237777c7efee926f8788420e49b6dcd8091c483/policyengine_us-1.709.1.tar.gz", hash = "sha256:66e4f09629fcca1fd9094e6211d0ee1e514c4f00ce55addca5d7063794963384", size = 9951226, upload-time = "2026-05-26T22:23:52.351Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/fc/4925a9e5d2a6f738d4c3c8252e6e52ece83c7da8e5cb8a66e98e8e80faa3/policyengine_us-1.709.1-py3-none-any.whl", hash = "sha256:9d26ff9a84a4f0c99cf9529fbfb6f07572c697010628fcd1f65fdb350cec40e6", size = 10870006, upload-time = "2026-05-26T22:23:49.08Z" }, +] [[package]] name = "policyengine-us-data" @@ -2242,7 +2246,7 @@ requires-dist = [ { name = "pandas", specifier = ">=2.3.1" }, { name = "pip-system-certs", specifier = ">=3.0" }, { name = "policyengine-core", specifier = ">=3.26.1,<3.27" }, - { name = "policyengine-us", git = "https://github.com/PolicyEngine/policyengine-us.git?rev=1da04a64dcdce26834b063d68daa835765a5d8ed" }, + { name = "policyengine-us", specifier = "==1.709.1" }, { name = "requests", specifier = ">=2.25.0" }, { name = "scipy", specifier = ">=1.15.3" }, { name = "setuptools", specifier = ">=60" },