Skip to content

Commit 401173b

Browse files
committed
Map SOI itemized deduction facts
1 parent 1ae870a commit 401173b

3 files changed

Lines changed: 368 additions & 10 deletions

File tree

src/microplex_us/targets/arch.py

Lines changed: 183 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,9 @@
115115
"qbi_amount": "qualified_business_income_deduction",
116116
"salt_amount": "salt",
117117
"limited_state_local_taxes_amount": "salt_deduction",
118+
"state_local_income_or_sales_tax_amount": (
119+
"state_and_local_sales_or_income_tax"
120+
),
118121
"charitable_amount": "charitable_deduction",
119122
"mortgage_interest_amount": "deductible_mortgage_interest",
120123
"mortgage_interest_paid_amount": "deductible_mortgage_interest",
@@ -147,18 +150,34 @@
147150

148151
ARCH_IRS_SOI_ITEMIZED_DEDUCTION_AMOUNT_VARIABLES = frozenset(
149152
{
153+
"charitable_amount",
154+
"deductible_points_amount",
155+
"home_mortgage_personal_seller_amount",
156+
"interest_paid_deduction_amount",
157+
"investment_interest_paid_amount",
158+
"limited_state_local_taxes_amount",
150159
"medical_amount",
151160
"medical_dental_expense_amount",
161+
"mortgage_interest_paid_amount",
152162
"real_estate_taxes_amount",
153163
"salt_amount",
164+
"state_local_income_or_sales_tax_amount",
154165
}
155166
)
156167

157168
ARCH_IRS_SOI_ITEMIZED_DEDUCTION_COUNT_VARIABLES = frozenset(
158169
{
170+
"charitable_returns",
171+
"deductible_points_returns",
172+
"home_mortgage_personal_seller_returns",
173+
"interest_paid_deduction_returns",
174+
"investment_interest_paid_returns",
175+
"limited_state_local_taxes_returns",
159176
"medical_claims",
177+
"mortgage_interest_paid_returns",
160178
"real_estate_taxes_claims",
161179
"salt_claims",
180+
"state_local_income_or_sales_tax_returns",
162181
}
163182
)
164183

@@ -320,6 +339,56 @@
320339
"limited_state_local_taxes_amount",
321340
"AMOUNT",
322341
),
342+
"irs_soi.returns_with_state_local_income_or_sales_taxes": (
343+
"state_local_income_or_sales_tax_returns",
344+
"COUNT",
345+
),
346+
"irs_soi.state_local_income_or_sales_taxes": (
347+
"state_local_income_or_sales_tax_amount",
348+
"AMOUNT",
349+
),
350+
"irs_soi.returns_with_interest_paid_deduction": (
351+
"interest_paid_deduction_returns",
352+
"COUNT",
353+
),
354+
"irs_soi.interest_paid_deduction": (
355+
"interest_paid_deduction_amount",
356+
"AMOUNT",
357+
),
358+
"irs_soi.returns_with_home_mortgage_interest_paid_to_financial_institutions": (
359+
"mortgage_interest_paid_returns",
360+
"COUNT",
361+
),
362+
"irs_soi.home_mortgage_interest_paid_to_financial_institutions": (
363+
"mortgage_interest_paid_amount",
364+
"AMOUNT",
365+
),
366+
"irs_soi.returns_with_home_mortgage_interest_paid_to_individuals": (
367+
"home_mortgage_personal_seller_returns",
368+
"COUNT",
369+
),
370+
"irs_soi.home_mortgage_interest_paid_to_individuals": (
371+
"home_mortgage_personal_seller_amount",
372+
"AMOUNT",
373+
),
374+
"irs_soi.returns_with_deductible_points": (
375+
"deductible_points_returns",
376+
"COUNT",
377+
),
378+
"irs_soi.deductible_points": ("deductible_points_amount", "AMOUNT"),
379+
"irs_soi.returns_with_investment_interest_expense_deduction": (
380+
"investment_interest_paid_returns",
381+
"COUNT",
382+
),
383+
"irs_soi.investment_interest_expense_deduction": (
384+
"investment_interest_paid_amount",
385+
"AMOUNT",
386+
),
387+
"irs_soi.returns_with_contributions_deduction": (
388+
"charitable_returns",
389+
"COUNT",
390+
),
391+
"irs_soi.contributions_deduction": ("charitable_amount", "AMOUNT"),
323392
"us:statutes/26/62#adjusted_gross_income": (
324393
"adjusted_gross_income",
325394
"AMOUNT",
@@ -632,6 +701,17 @@
632701
),
633702
}
634703

704+
ARCH_FACT_CONCEPTS_TO_SKIP = frozenset(
705+
{
706+
# SOI Table 2.1 total state/local taxes includes personal property
707+
# taxes. PolicyEngine's federal SALT input currently combines
708+
# state/local income-or-sales taxes and real estate taxes, so the total
709+
# source-native concept is not exposed as a Microplex target.
710+
"irs_soi.returns_with_state_and_local_taxes",
711+
"irs_soi.state_and_local_taxes",
712+
}
713+
)
714+
635715
ARCH_FACT_DOMAIN_CONSTRAINTS = {
636716
"all_individual_income_tax_returns": (("is_tax_filer", "==", "1"),),
637717
"form_w2_items": (),
@@ -833,7 +913,26 @@
833913
"state_individual_income_tax_collections": (
834914
"State individual income tax collections"
835915
),
916+
"charitable_amount": "Contributions deduction amount",
917+
"charitable_returns": "Returns with contributions deduction",
918+
"deductible_points_returns": "Returns with deductible points",
919+
"home_mortgage_personal_seller_returns": (
920+
"Returns with home mortgage interest paid to individuals"
921+
),
922+
"interest_paid_deduction_returns": ("Returns with interest paid deduction"),
923+
"investment_interest_paid_returns": (
924+
"Returns with investment interest expense deduction"
925+
),
836926
"limited_state_local_taxes_amount": "Limited state and local taxes amount",
927+
"state_local_income_or_sales_tax_amount": (
928+
"State and local income or sales taxes amount"
929+
),
930+
"state_local_income_or_sales_tax_returns": (
931+
"Returns with state and local income or sales taxes"
932+
),
933+
"mortgage_interest_paid_returns": (
934+
"Returns with home mortgage interest paid to financial institutions"
935+
),
837936
"interest_paid_deduction_amount": "Interest paid deduction amount",
838937
"mortgage_interest_paid_amount": "Mortgage interest paid amount",
839938
"home_mortgage_personal_seller_amount": (
@@ -990,8 +1089,22 @@
9901089
"income_tax_before_credits": "IRS SOI Publication 1304 Table 1.1",
9911090
"income_tax_before_credits_returns": "IRS SOI Historic Table 2",
9921091
"tax_filer_individual_count": "IRS SOI Historic Table 2",
993-
"interest_paid_deduction_amount": "IRS SOI Historic Table 2",
994-
"limited_state_local_taxes_amount": "IRS SOI Historic Table 2",
1092+
"charitable_amount": "IRS SOI Publication 1304 Table 2.1",
1093+
"charitable_returns": "IRS SOI Publication 1304 Table 2.1",
1094+
"deductible_points_amount": "IRS SOI Publication 1304 Table 2.1",
1095+
"deductible_points_returns": "IRS SOI Publication 1304 Table 2.1",
1096+
"home_mortgage_personal_seller_amount": "IRS SOI Publication 1304 Table 2.1",
1097+
"home_mortgage_personal_seller_returns": "IRS SOI Publication 1304 Table 2.1",
1098+
"interest_paid_deduction_amount": "IRS SOI Publication 1304 Table 2.1",
1099+
"interest_paid_deduction_returns": "IRS SOI Publication 1304 Table 2.1",
1100+
"investment_interest_paid_amount": "IRS SOI Publication 1304 Table 2.1",
1101+
"investment_interest_paid_returns": "IRS SOI Publication 1304 Table 2.1",
1102+
"limited_state_local_taxes_amount": "IRS SOI Publication 1304 Table 2.1",
1103+
"limited_state_local_taxes_returns": "IRS SOI Publication 1304 Table 2.1",
1104+
"mortgage_interest_paid_amount": "IRS SOI Publication 1304 Table 2.1",
1105+
"mortgage_interest_paid_returns": "IRS SOI Publication 1304 Table 2.1",
1106+
"state_local_income_or_sales_tax_amount": "IRS SOI Publication 1304 Table 2.1",
1107+
"state_local_income_or_sales_tax_returns": "IRS SOI Publication 1304 Table 2.1",
9951108
"liheap_household_count": "HHS ACF LIHEAP National Profile",
9961109
"medicaid_benefits": (
9971110
"CMS National Health Expenditures by type of service and source of funds"
@@ -4154,7 +4267,10 @@ def _consumer_fact_rows_to_records(
41544267
)
41554268
)
41564269
stratum_id = stratum_ids.setdefault(constraints, len(stratum_ids) + 1)
4157-
variable, target_type = _arch_consumer_fact_target_identity(row)
4270+
target_identity = _arch_consumer_fact_target_identity(row)
4271+
if target_identity is None:
4272+
continue
4273+
variable, target_type = target_identity
41584274
source = row.get("source") or {}
41594275
observed_measure = row.get("observed_measure") or {}
41604276
geography = row.get("geography") or {}
@@ -4204,8 +4320,12 @@ def _consumer_fact_period(row: dict[str, Any]) -> int:
42044320
return arch_consumer_fact_period(row)
42054321

42064322

4207-
def _arch_consumer_fact_target_identity(row: dict[str, Any]) -> tuple[str, str]:
4323+
def _arch_consumer_fact_target_identity(
4324+
row: dict[str, Any],
4325+
) -> tuple[str, str] | None:
42084326
concept = _arch_consumer_fact_concept(row)
4327+
if concept in ARCH_FACT_CONCEPTS_TO_SKIP:
4328+
return None
42094329
if concept == "ssa.annual_oasdi_or_ssi_payment_amount":
42104330
return (_ssa_payment_variable_from_consumer_fact(row), "AMOUNT")
42114331
try:
@@ -4316,7 +4436,10 @@ def _group_arch_fact_rows(
43164436
row = item["row"]
43174437
constraints = tuple(dict.fromkeys(item["constraints"]))
43184438
stratum_id = stratum_ids.setdefault(constraints, len(stratum_ids) + 1)
4319-
variable, target_type = _arch_fact_target_identity(row)
4439+
target_identity = _arch_fact_target_identity(row)
4440+
if target_identity is None:
4441+
continue
4442+
variable, target_type = target_identity
43204443
period = int(row["period_value"])
43214444
source_name = row["source_name"] or "arch"
43224445
fact_lineage = lineage.get(fact_key, {})
@@ -4355,8 +4478,10 @@ def _group_arch_fact_rows(
43554478
return records
43564479

43574480

4358-
def _arch_fact_target_identity(row: sqlite3.Row) -> tuple[str, str]:
4481+
def _arch_fact_target_identity(row: sqlite3.Row) -> tuple[str, str] | None:
43594482
concept = str(row["measure_concept"])
4483+
if concept in ARCH_FACT_CONCEPTS_TO_SKIP:
4484+
return None
43604485
try:
43614486
return ARCH_FACT_CONCEPT_TO_TARGET[concept]
43624487
except KeyError as exc:
@@ -5034,7 +5159,10 @@ def _matches_arch_target_cell(
50345159
_split_target_cell_domain_variables(domain_variable)
50355160
)
50365161
if domain_variable is None or not cell_domain_variables:
5037-
if _target_self_domain_is_redundant(target, target_domain_variables):
5162+
if _target_domain_variables_are_redundant_for_unfiltered_cell(
5163+
target,
5164+
target_domain_variables,
5165+
):
50385166
return True
50395167
return not target_domain_variables
50405168
if not _target_domain_variables_match(
@@ -5053,10 +5181,21 @@ def _target_domain_variables_match(
50535181
target_domain_variables: set[str],
50545182
cell_domain_variables: set[str],
50555183
) -> bool:
5184+
target_domain_variables = _normalize_arch_target_domain_variables(
5185+
target,
5186+
target_domain_variables,
5187+
)
5188+
cell_domain_variables = _normalize_arch_target_domain_variables(
5189+
target,
5190+
cell_domain_variables,
5191+
)
50565192
if cell_domain_variables == target_domain_variables:
50575193
return True
50585194

5059-
implied_domain_variables = _arch_target_implied_domain_variables(target)
5195+
implied_domain_variables = _normalize_arch_target_domain_variables(
5196+
target,
5197+
_arch_target_implied_domain_variables(target),
5198+
)
50605199
effective_target_domain_variables = (
50615200
target_domain_variables | implied_domain_variables
50625201
)
@@ -5093,6 +5232,42 @@ def _target_domain_variables_match(
50935232
return False
50945233

50955234

5235+
def _normalize_arch_target_domain_variables(
5236+
target: CanonicalTargetSpec,
5237+
domain_variables: set[str],
5238+
) -> set[str]:
5239+
"""Normalize source-native domains to equivalent PE target cell domains."""
5240+
if not _arch_target_has_soi_itemized_deduction_domain(target):
5241+
return set(domain_variables)
5242+
return {
5243+
"tax_unit_itemizes" if variable == "itemized_deductions" else variable
5244+
for variable in domain_variables
5245+
}
5246+
5247+
5248+
def _target_domain_variables_are_redundant_for_unfiltered_cell(
5249+
target: CanonicalTargetSpec,
5250+
target_domain_variables: set[str],
5251+
) -> bool:
5252+
normalized_domain_variables = _normalize_arch_target_domain_variables(
5253+
target,
5254+
target_domain_variables,
5255+
)
5256+
if _target_self_domain_is_redundant(target, normalized_domain_variables):
5257+
return True
5258+
return (
5259+
target.aggregation is TargetAggregation.SUM
5260+
and _arch_target_has_soi_itemized_deduction_domain(target)
5261+
and normalized_domain_variables.issubset({"tax_unit_itemizes"})
5262+
)
5263+
5264+
5265+
def _arch_target_has_soi_itemized_deduction_domain(
5266+
target: CanonicalTargetSpec,
5267+
) -> bool:
5268+
return _arch_target_implied_domain_variables(target) == {"tax_unit_itemizes"}
5269+
5270+
50965271
def _arch_target_implied_domain_variables(
50975272
target: CanonicalTargetSpec,
50985273
) -> set[str]:

tests/targets/test_arch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3768,7 +3768,7 @@ def test_arch_target_gap_queue_points_itemized_deductions_to_soi_table_2(tmp_pat
37683768
rows_by_cell = {(row.variable, row.domain_variable): row for row in report.rows}
37693769
salt_row = rows_by_variable["salt_deduction"]
37703770
assert salt_row.expected_source == "IRS_SOI"
3771-
assert salt_row.expected_source_table == "IRS SOI Historic Table 2"
3771+
assert salt_row.expected_source_table == "IRS SOI Publication 1304 Table 2.1"
37723772
assert salt_row.expected_arch_variable == "limited_state_local_taxes_amount"
37733773
assert salt_row.expected_target_type == "AMOUNT"
37743774
assert salt_row.expected_entity == "tax_unit"
@@ -3777,7 +3777,7 @@ def test_arch_target_gap_queue_points_itemized_deductions_to_soi_table_2(tmp_pat
37773777

37783778
interest_row = rows_by_variable["interest_deduction"]
37793779
assert interest_row.expected_source == "IRS_SOI"
3780-
assert interest_row.expected_source_table == "IRS SOI Historic Table 2"
3780+
assert interest_row.expected_source_table == "IRS SOI Publication 1304 Table 2.1"
37813781
assert interest_row.expected_arch_variable == "interest_paid_deduction_amount"
37823782
assert interest_row.expected_target_type == "AMOUNT"
37833783
assert interest_row.expected_entity == "tax_unit"

0 commit comments

Comments
 (0)