Skip to content

Commit cc55ad4

Browse files
authored
Map state broad SOI Arch targets (#21)
1 parent 78672e5 commit cc55ad4

2 files changed

Lines changed: 124 additions & 0 deletions

File tree

src/microplex_us/targets/arch.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@
123123
"investment_interest_paid_amount": "investment_interest_expense",
124124
"interest_paid_deduction_amount": "interest_deduction",
125125
"medical_amount": "medical_expense_deduction",
126+
"medical_dental_expense_amount": "medical_expense_deduction",
126127
"real_estate_taxes_amount": "real_estate_taxes",
127128
"aca_aptc_amount": "aca_ptc",
128129
"medicaid_benefits": "medicaid",
@@ -324,12 +325,35 @@
324325
"COUNT",
325326
),
326327
"irs_soi.ordinary_dividends": ("ordinary_dividends_amount", "AMOUNT"),
328+
"irs_soi.returns_with_qualified_dividends": (
329+
"qualified_dividends_returns",
330+
"COUNT",
331+
),
332+
"irs_soi.qualified_dividends": ("qualified_dividends_amount", "AMOUNT"),
327333
"irs_soi.returns_with_taxable_interest": (
328334
"taxable_interest_returns",
329335
"COUNT",
330336
),
331337
"irs_soi.taxable_interest": ("taxable_interest_amount", "AMOUNT"),
338+
"irs_soi.returns_with_tax_exempt_interest": (
339+
"tax_exempt_interest_returns",
340+
"COUNT",
341+
),
342+
"irs_soi.tax_exempt_interest": ("tax_exempt_interest_amount", "AMOUNT"),
343+
"irs_soi.returns_with_schedule_c_income": (
344+
"schedule_c_income_returns",
345+
"COUNT",
346+
),
347+
"irs_soi.schedule_c_income": ("schedule_c_income_amount", "AMOUNT"),
332348
"irs_soi.taxable_net_capital_gains": ("net_capital_gains_amount", "AMOUNT"),
349+
"irs_soi.returns_with_partnership_scorp_income": (
350+
"partnership_scorp_income_returns",
351+
"COUNT",
352+
),
353+
"irs_soi.partnership_scorp_income": (
354+
"partnership_scorp_income_amount",
355+
"AMOUNT",
356+
),
333357
"irs_soi.taxable_ira_distributions": (
334358
"taxable_ira_distributions_amount",
335359
"AMOUNT",
@@ -348,6 +372,14 @@
348372
"itemized_deductions_returns",
349373
"COUNT",
350374
),
375+
"irs_soi.returns_with_medical_dental_expense_deduction": (
376+
"medical_claims",
377+
"COUNT",
378+
),
379+
"irs_soi.medical_dental_expense_deduction": (
380+
"medical_dental_expense_amount",
381+
"AMOUNT",
382+
),
351383
"irs_soi.standard_deduction": ("standard_deduction", "AMOUNT"),
352384
"irs_soi.taxable_income": ("taxable_income", "AMOUNT"),
353385
"irs_soi.total_income": ("total_income", "AMOUNT"),
@@ -761,11 +793,16 @@
761793
),
762794
"taxable_social_security_amount": "Taxable Social Security benefits amount",
763795
"ordinary_dividends_amount": "Ordinary dividends amount",
796+
"qualified_dividends_returns": "Returns with qualified dividends",
764797
"qualified_dividends_amount": "Qualified dividends amount",
765798
"long_term_capital_gains_amount": "Long-term capital gains amount",
766799
"short_term_capital_gains_amount": "Short-term capital gains amount",
800+
"partnership_scorp_income_returns": "Returns with partnership and S-corp income",
767801
"partnership_scorp_income_amount": "Partnership and S-corp income amount",
802+
"schedule_c_income_returns": "Returns with Schedule C income",
768803
"schedule_c_income_amount": "Schedule C income amount",
804+
"medical_claims": "Returns with medical expense deduction",
805+
"medical_dental_expense_amount": "Medical and dental expense amount",
769806
"tax_unit_count": "Tax unit count",
770807
"household_count": "Household count",
771808
"population": "Population count",

tests/targets/test_arch_facts.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,6 +1281,93 @@ def test_arch_consumer_fact_jsonl_provider_maps_state_soi_rows(
12811281
}
12821282

12831283

1284+
def test_arch_consumer_fact_jsonl_provider_maps_state_broad_soi_concepts(
1285+
tmp_path: Path,
1286+
) -> None:
1287+
consumer_jsonl = tmp_path / "consumer_facts.jsonl"
1288+
geography = {"level": "state", "id": "0400000US06", "name": "California"}
1289+
rows = [
1290+
_consumer_fact(
1291+
"state-ca-qualified-dividends",
1292+
concept="irs_soi.qualified_dividends",
1293+
domain="all_individual_income_tax_returns",
1294+
source_name="irs_soi",
1295+
source_table="Historic Table 2 state broad totals",
1296+
period={"type": "tax_year", "value": 2022},
1297+
geography=geography,
1298+
value=93_000_000_000,
1299+
unit="usd",
1300+
),
1301+
_consumer_fact(
1302+
"state-ca-schedule-c-returns",
1303+
concept="irs_soi.returns_with_schedule_c_income",
1304+
domain="all_individual_income_tax_returns",
1305+
source_name="irs_soi",
1306+
source_table="Historic Table 2 state broad totals",
1307+
period={"type": "tax_year", "value": 2022},
1308+
geography=geography,
1309+
value=3_617_080,
1310+
),
1311+
_consumer_fact(
1312+
"state-ca-partnership-scorp",
1313+
concept="irs_soi.partnership_scorp_income",
1314+
domain="all_individual_income_tax_returns",
1315+
source_name="irs_soi",
1316+
source_table="Historic Table 2 state broad totals",
1317+
period={"type": "tax_year", "value": 2022},
1318+
geography=geography,
1319+
value=125_930_370_000,
1320+
unit="usd",
1321+
),
1322+
_consumer_fact(
1323+
"state-ca-medical-dental",
1324+
concept="irs_soi.medical_dental_expense_deduction",
1325+
domain="all_individual_income_tax_returns",
1326+
source_name="irs_soi",
1327+
source_table="Historic Table 2 state broad totals",
1328+
period={"type": "tax_year", "value": 2022},
1329+
geography=geography,
1330+
value=11_456_144_000,
1331+
unit="usd",
1332+
),
1333+
]
1334+
consumer_jsonl.write_text(
1335+
"\n".join(json.dumps(row, sort_keys=True) for row in rows) + "\n"
1336+
)
1337+
1338+
target_set = ArchConsumerFactJSONLTargetProvider(consumer_jsonl).load_target_set(
1339+
TargetQuery(period=2022)
1340+
)
1341+
targets_by_arch_variable = {
1342+
target.metadata["arch_variable"]: target for target in target_set.targets
1343+
}
1344+
1345+
qualified_dividends = targets_by_arch_variable["qualified_dividends_amount"]
1346+
assert qualified_dividends.metadata["variable"] == "qualified_dividend_income"
1347+
assert qualified_dividends.measure == "qualified_dividend_income"
1348+
assert _target_filter_tuples(qualified_dividends) == {
1349+
("tax_unit_is_filer", "==", "1"),
1350+
("state_fips", "==", "06"),
1351+
}
1352+
1353+
schedule_c_returns = targets_by_arch_variable["schedule_c_income_returns"]
1354+
assert schedule_c_returns.metadata["variable"] == "self_employment_income"
1355+
assert schedule_c_returns.aggregation.value == "count"
1356+
assert ("self_employment_income", ">", "0") in _target_filter_tuples(
1357+
schedule_c_returns
1358+
)
1359+
1360+
partnership = targets_by_arch_variable["partnership_scorp_income_amount"]
1361+
assert (
1362+
partnership.metadata["variable"] == "tax_unit_partnership_s_corp_income"
1363+
)
1364+
assert partnership.measure == "tax_unit_partnership_s_corp_income"
1365+
1366+
medical = targets_by_arch_variable["medical_dental_expense_amount"]
1367+
assert medical.metadata["variable"] == "medical_expense_deduction"
1368+
assert medical.measure == "medical_expense_deduction"
1369+
1370+
12841371
def test_arch_consumer_fact_jsonl_provider_maps_eitc_by_agi_and_children(
12851372
tmp_path: Path,
12861373
) -> None:

0 commit comments

Comments
 (0)