Skip to content
This repository was archived by the owner on Jun 14, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions src/microplex_us/targets/arch.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@
"investment_interest_paid_amount": "investment_interest_expense",
"interest_paid_deduction_amount": "interest_deduction",
"medical_amount": "medical_expense_deduction",
"medical_dental_expense_amount": "medical_expense_deduction",
"real_estate_taxes_amount": "real_estate_taxes",
"aca_aptc_amount": "aca_ptc",
"medicaid_benefits": "medicaid",
Expand Down Expand Up @@ -324,12 +325,35 @@
"COUNT",
),
"irs_soi.ordinary_dividends": ("ordinary_dividends_amount", "AMOUNT"),
"irs_soi.returns_with_qualified_dividends": (
"qualified_dividends_returns",
"COUNT",
),
"irs_soi.qualified_dividends": ("qualified_dividends_amount", "AMOUNT"),
"irs_soi.returns_with_taxable_interest": (
"taxable_interest_returns",
"COUNT",
),
"irs_soi.taxable_interest": ("taxable_interest_amount", "AMOUNT"),
"irs_soi.returns_with_tax_exempt_interest": (
"tax_exempt_interest_returns",
"COUNT",
),
"irs_soi.tax_exempt_interest": ("tax_exempt_interest_amount", "AMOUNT"),
"irs_soi.returns_with_schedule_c_income": (
"schedule_c_income_returns",
"COUNT",
),
"irs_soi.schedule_c_income": ("schedule_c_income_amount", "AMOUNT"),
"irs_soi.taxable_net_capital_gains": ("net_capital_gains_amount", "AMOUNT"),
"irs_soi.returns_with_partnership_scorp_income": (
"partnership_scorp_income_returns",
"COUNT",
),
"irs_soi.partnership_scorp_income": (
"partnership_scorp_income_amount",
"AMOUNT",
),
"irs_soi.taxable_ira_distributions": (
"taxable_ira_distributions_amount",
"AMOUNT",
Expand All @@ -348,6 +372,14 @@
"itemized_deductions_returns",
"COUNT",
),
"irs_soi.returns_with_medical_dental_expense_deduction": (
"medical_claims",
"COUNT",
),
"irs_soi.medical_dental_expense_deduction": (
"medical_dental_expense_amount",
"AMOUNT",
),
"irs_soi.standard_deduction": ("standard_deduction", "AMOUNT"),
"irs_soi.taxable_income": ("taxable_income", "AMOUNT"),
"irs_soi.total_income": ("total_income", "AMOUNT"),
Expand Down Expand Up @@ -761,11 +793,16 @@
),
"taxable_social_security_amount": "Taxable Social Security benefits amount",
"ordinary_dividends_amount": "Ordinary dividends amount",
"qualified_dividends_returns": "Returns with qualified dividends",
"qualified_dividends_amount": "Qualified dividends amount",
"long_term_capital_gains_amount": "Long-term capital gains amount",
"short_term_capital_gains_amount": "Short-term capital gains amount",
"partnership_scorp_income_returns": "Returns with partnership and S-corp income",
"partnership_scorp_income_amount": "Partnership and S-corp income amount",
"schedule_c_income_returns": "Returns with Schedule C income",
"schedule_c_income_amount": "Schedule C income amount",
"medical_claims": "Returns with medical expense deduction",
"medical_dental_expense_amount": "Medical and dental expense amount",
"tax_unit_count": "Tax unit count",
"household_count": "Household count",
"population": "Population count",
Expand Down
87 changes: 87 additions & 0 deletions tests/targets/test_arch_facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1281,6 +1281,93 @@ def test_arch_consumer_fact_jsonl_provider_maps_state_soi_rows(
}


def test_arch_consumer_fact_jsonl_provider_maps_state_broad_soi_concepts(
tmp_path: Path,
) -> None:
consumer_jsonl = tmp_path / "consumer_facts.jsonl"
geography = {"level": "state", "id": "0400000US06", "name": "California"}
rows = [
_consumer_fact(
"state-ca-qualified-dividends",
concept="irs_soi.qualified_dividends",
domain="all_individual_income_tax_returns",
source_name="irs_soi",
source_table="Historic Table 2 state broad totals",
period={"type": "tax_year", "value": 2022},
geography=geography,
value=93_000_000_000,
unit="usd",
),
_consumer_fact(
"state-ca-schedule-c-returns",
concept="irs_soi.returns_with_schedule_c_income",
domain="all_individual_income_tax_returns",
source_name="irs_soi",
source_table="Historic Table 2 state broad totals",
period={"type": "tax_year", "value": 2022},
geography=geography,
value=3_617_080,
),
_consumer_fact(
"state-ca-partnership-scorp",
concept="irs_soi.partnership_scorp_income",
domain="all_individual_income_tax_returns",
source_name="irs_soi",
source_table="Historic Table 2 state broad totals",
period={"type": "tax_year", "value": 2022},
geography=geography,
value=125_930_370_000,
unit="usd",
),
_consumer_fact(
"state-ca-medical-dental",
concept="irs_soi.medical_dental_expense_deduction",
domain="all_individual_income_tax_returns",
source_name="irs_soi",
source_table="Historic Table 2 state broad totals",
period={"type": "tax_year", "value": 2022},
geography=geography,
value=11_456_144_000,
unit="usd",
),
]
consumer_jsonl.write_text(
"\n".join(json.dumps(row, sort_keys=True) for row in rows) + "\n"
)

target_set = ArchConsumerFactJSONLTargetProvider(consumer_jsonl).load_target_set(
TargetQuery(period=2022)
)
targets_by_arch_variable = {
target.metadata["arch_variable"]: target for target in target_set.targets
}

qualified_dividends = targets_by_arch_variable["qualified_dividends_amount"]
assert qualified_dividends.metadata["variable"] == "qualified_dividend_income"
assert qualified_dividends.measure == "qualified_dividend_income"
assert _target_filter_tuples(qualified_dividends) == {
("tax_unit_is_filer", "==", "1"),
("state_fips", "==", "06"),
}

schedule_c_returns = targets_by_arch_variable["schedule_c_income_returns"]
assert schedule_c_returns.metadata["variable"] == "self_employment_income"
assert schedule_c_returns.aggregation.value == "count"
assert ("self_employment_income", ">", "0") in _target_filter_tuples(
schedule_c_returns
)

partnership = targets_by_arch_variable["partnership_scorp_income_amount"]
assert (
partnership.metadata["variable"] == "tax_unit_partnership_s_corp_income"
)
assert partnership.measure == "tax_unit_partnership_s_corp_income"

medical = targets_by_arch_variable["medical_dental_expense_amount"]
assert medical.metadata["variable"] == "medical_expense_deduction"
assert medical.measure == "medical_expense_deduction"


def test_arch_consumer_fact_jsonl_provider_maps_eitc_by_agi_and_children(
tmp_path: Path,
) -> None:
Expand Down
Loading