From 21c852a73ca5dd3a20fdc821137017cedbe2e844 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Wed, 27 May 2026 21:48:15 -0400 Subject: [PATCH] Map state broad SOI Arch targets --- src/microplex_us/targets/arch.py | 37 ++++++++++++++ tests/targets/test_arch_facts.py | 87 ++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/src/microplex_us/targets/arch.py b/src/microplex_us/targets/arch.py index 0b6a7fa..487eb8a 100644 --- a/src/microplex_us/targets/arch.py +++ b/src/microplex_us/targets/arch.py @@ -123,6 +123,7 @@ "investment_interest_paid_amount": "investment_interest_expense", "interest_paid_deduction_amount": "interest_deduction", "medical_amount": "medical_expense_deduction", + "medical_dental_expense_amount": "medical_expense_deduction", "real_estate_taxes_amount": "real_estate_taxes", "aca_aptc_amount": "aca_ptc", "medicaid_benefits": "medicaid", @@ -324,12 +325,35 @@ "COUNT", ), "irs_soi.ordinary_dividends": ("ordinary_dividends_amount", "AMOUNT"), + "irs_soi.returns_with_qualified_dividends": ( + "qualified_dividends_returns", + "COUNT", + ), + "irs_soi.qualified_dividends": ("qualified_dividends_amount", "AMOUNT"), "irs_soi.returns_with_taxable_interest": ( "taxable_interest_returns", "COUNT", ), "irs_soi.taxable_interest": ("taxable_interest_amount", "AMOUNT"), + "irs_soi.returns_with_tax_exempt_interest": ( + "tax_exempt_interest_returns", + "COUNT", + ), + "irs_soi.tax_exempt_interest": ("tax_exempt_interest_amount", "AMOUNT"), + "irs_soi.returns_with_schedule_c_income": ( + "schedule_c_income_returns", + "COUNT", + ), + "irs_soi.schedule_c_income": ("schedule_c_income_amount", "AMOUNT"), "irs_soi.taxable_net_capital_gains": ("net_capital_gains_amount", "AMOUNT"), + "irs_soi.returns_with_partnership_scorp_income": ( + "partnership_scorp_income_returns", + "COUNT", + ), + "irs_soi.partnership_scorp_income": ( + "partnership_scorp_income_amount", + "AMOUNT", + ), "irs_soi.taxable_ira_distributions": ( "taxable_ira_distributions_amount", "AMOUNT", @@ -348,6 +372,14 @@ "itemized_deductions_returns", "COUNT", ), + "irs_soi.returns_with_medical_dental_expense_deduction": ( + "medical_claims", + "COUNT", + ), + "irs_soi.medical_dental_expense_deduction": ( + "medical_dental_expense_amount", + "AMOUNT", + ), "irs_soi.standard_deduction": ("standard_deduction", "AMOUNT"), "irs_soi.taxable_income": ("taxable_income", "AMOUNT"), "irs_soi.total_income": ("total_income", "AMOUNT"), @@ -761,11 +793,16 @@ ), "taxable_social_security_amount": "Taxable Social Security benefits amount", "ordinary_dividends_amount": "Ordinary dividends amount", + "qualified_dividends_returns": "Returns with qualified dividends", "qualified_dividends_amount": "Qualified dividends amount", "long_term_capital_gains_amount": "Long-term capital gains amount", "short_term_capital_gains_amount": "Short-term capital gains amount", + "partnership_scorp_income_returns": "Returns with partnership and S-corp income", "partnership_scorp_income_amount": "Partnership and S-corp income amount", + "schedule_c_income_returns": "Returns with Schedule C income", "schedule_c_income_amount": "Schedule C income amount", + "medical_claims": "Returns with medical expense deduction", + "medical_dental_expense_amount": "Medical and dental expense amount", "tax_unit_count": "Tax unit count", "household_count": "Household count", "population": "Population count", diff --git a/tests/targets/test_arch_facts.py b/tests/targets/test_arch_facts.py index d7f2b1b..c230d46 100644 --- a/tests/targets/test_arch_facts.py +++ b/tests/targets/test_arch_facts.py @@ -1281,6 +1281,93 @@ def test_arch_consumer_fact_jsonl_provider_maps_state_soi_rows( } +def test_arch_consumer_fact_jsonl_provider_maps_state_broad_soi_concepts( + tmp_path: Path, +) -> None: + consumer_jsonl = tmp_path / "consumer_facts.jsonl" + geography = {"level": "state", "id": "0400000US06", "name": "California"} + rows = [ + _consumer_fact( + "state-ca-qualified-dividends", + concept="irs_soi.qualified_dividends", + domain="all_individual_income_tax_returns", + source_name="irs_soi", + source_table="Historic Table 2 state broad totals", + period={"type": "tax_year", "value": 2022}, + geography=geography, + value=93_000_000_000, + unit="usd", + ), + _consumer_fact( + "state-ca-schedule-c-returns", + concept="irs_soi.returns_with_schedule_c_income", + domain="all_individual_income_tax_returns", + source_name="irs_soi", + source_table="Historic Table 2 state broad totals", + period={"type": "tax_year", "value": 2022}, + geography=geography, + value=3_617_080, + ), + _consumer_fact( + "state-ca-partnership-scorp", + concept="irs_soi.partnership_scorp_income", + domain="all_individual_income_tax_returns", + source_name="irs_soi", + source_table="Historic Table 2 state broad totals", + period={"type": "tax_year", "value": 2022}, + geography=geography, + value=125_930_370_000, + unit="usd", + ), + _consumer_fact( + "state-ca-medical-dental", + concept="irs_soi.medical_dental_expense_deduction", + domain="all_individual_income_tax_returns", + source_name="irs_soi", + source_table="Historic Table 2 state broad totals", + period={"type": "tax_year", "value": 2022}, + geography=geography, + value=11_456_144_000, + unit="usd", + ), + ] + consumer_jsonl.write_text( + "\n".join(json.dumps(row, sort_keys=True) for row in rows) + "\n" + ) + + target_set = ArchConsumerFactJSONLTargetProvider(consumer_jsonl).load_target_set( + TargetQuery(period=2022) + ) + targets_by_arch_variable = { + target.metadata["arch_variable"]: target for target in target_set.targets + } + + qualified_dividends = targets_by_arch_variable["qualified_dividends_amount"] + assert qualified_dividends.metadata["variable"] == "qualified_dividend_income" + assert qualified_dividends.measure == "qualified_dividend_income" + assert _target_filter_tuples(qualified_dividends) == { + ("tax_unit_is_filer", "==", "1"), + ("state_fips", "==", "06"), + } + + schedule_c_returns = targets_by_arch_variable["schedule_c_income_returns"] + assert schedule_c_returns.metadata["variable"] == "self_employment_income" + assert schedule_c_returns.aggregation.value == "count" + assert ("self_employment_income", ">", "0") in _target_filter_tuples( + schedule_c_returns + ) + + partnership = targets_by_arch_variable["partnership_scorp_income_amount"] + assert ( + partnership.metadata["variable"] == "tax_unit_partnership_s_corp_income" + ) + assert partnership.measure == "tax_unit_partnership_s_corp_income" + + medical = targets_by_arch_variable["medical_dental_expense_amount"] + assert medical.metadata["variable"] == "medical_expense_deduction" + assert medical.measure == "medical_expense_deduction" + + def test_arch_consumer_fact_jsonl_provider_maps_eitc_by_agi_and_children( tmp_path: Path, ) -> None: