Skip to content

Commit 4a90460

Browse files
committed
Map ACS local Arch facts into Microplex targets
1 parent 2b1a613 commit 4a90460

2 files changed

Lines changed: 170 additions & 1 deletion

File tree

src/microplex_us/targets/arch.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,8 @@
573573
"medicare_part_b_premiums",
574574
"AMOUNT",
575575
),
576+
"census_acs.household_count": ("household_count", "COUNT"),
577+
"census_acs.person_count": ("population", "COUNT"),
576578
"census_decennial.resident_population": ("population", "COUNT"),
577579
"census_decennial.occupied_housing_units": ("household_count", "COUNT"),
578580
"census_pep.resident_population": ("population", "COUNT"),
@@ -781,10 +783,12 @@
781783
"tanf_cash_assistance": (),
782784
"tanf_caseload": (),
783785
"liheap_state_programs": (),
786+
"total_population": (),
784787
}
785788

786789
ARCH_FACT_CONSTRAINT_VARIABLE_ALIASES = {
787790
"age": "age",
791+
"snap_receipt_status": "snap_receipt_status",
788792
"us.tax.earned_income_credit_qualifying_children": "eitc_child_count",
789793
"us_social_security_and_ssi.program_payment_type": "program_payment_type",
790794
"us:statutes/26/62#adjusted_gross_income": "adjusted_gross_income",
@@ -4870,6 +4874,11 @@ def _arch_constraint_display_label(
48704874
return f"state senate district {value_text}"
48714875
if variable == "sldl_id" and canonical_operator == "==":
48724876
return f"state house district {value_text}"
4877+
if variable == "snap_receipt_status" and canonical_operator == "==":
4878+
if value_text == "receiving_food_stamps_snap":
4879+
return "SNAP > 0"
4880+
if value_text == "not_receiving_food_stamps_snap":
4881+
return "SNAP = 0"
48734882
positive_feature = ARCH_POSITIVE_CONSTRAINT_ALIASES.get(variable)
48744883
if positive_feature is not None and canonical_operator == "==":
48754884
feature_label = _humanize_snake_label(positive_feature)
@@ -4936,6 +4945,13 @@ def _canonical_filters_for_arch_constraints(
49364945
)
49374946
)
49384947
continue
4948+
if variable == "snap_receipt_status":
4949+
if value == "receiving_food_stamps_snap" and canonical_operator == "==":
4950+
filters.append(TargetFilter(feature="snap", operator=">", value=0))
4951+
continue
4952+
if value == "not_receiving_food_stamps_snap" and canonical_operator == "==":
4953+
filters.append(TargetFilter(feature="snap", operator="==", value=0))
4954+
continue
49394955
positive_feature = ARCH_POSITIVE_CONSTRAINT_ALIASES.get(variable)
49404956
if positive_feature is not None:
49414957
filters.append(
@@ -5017,7 +5033,7 @@ def _county_fips_from_arch_geography_id(geography_id: Any) -> str:
50175033

50185034
def _congressional_district_from_arch_geography_id(geography_id: Any) -> str:
50195035
raw = str(geography_id)
5020-
if raw.startswith("5001800US"):
5036+
if raw.startswith(("5001800US", "5001900US")):
50215037
return raw[-4:]
50225038
return raw
50235039

@@ -5662,6 +5678,12 @@ def _arch_gap_expected_source(cell: dict[str, Any]) -> str | None:
56625678
return "BEA"
56635679
if variable == "tax_unit_count" and "aca_ptc" in domain_variables:
56645680
return "IRS_SOI"
5681+
if (
5682+
variable in {"household_count", "person_count"}
5683+
and "snap" in domain_variables
5684+
and _normalize_geo_level(cell.get("geo_level")) == "district"
5685+
):
5686+
return "CENSUS_ACS"
56655687
if variable == "snap" or "snap" in domain_variables:
56665688
return "USDA_SNAP"
56675689
if variable == "tanf" or "tanf" in domain_variables:
@@ -5683,6 +5705,11 @@ def _arch_gap_expected_source(cell: dict[str, Any]) -> str | None:
56835705
if variable == "person_count":
56845706
if _normalize_geo_level(cell.get("geo_level")) in {"sldu", "sldl"}:
56855707
return "CENSUS_DECENNIAL"
5708+
if (
5709+
_normalize_geo_level(cell.get("geo_level")) == "district"
5710+
and "age" in domain_variables
5711+
):
5712+
return "CENSUS_ACS"
56865713
if "adjusted_gross_income" in domain_variables:
56875714
return "IRS_SOI"
56885715
if "age" in domain_variables or not domain_variables:

tests/targets/test_arch_facts.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,6 +1492,148 @@ def test_arch_consumer_fact_jsonl_provider_maps_state_soi_rows(
14921492
}
14931493

14941494

1495+
def test_arch_consumer_fact_jsonl_provider_maps_acs_district_age_rows(
1496+
tmp_path: Path,
1497+
) -> None:
1498+
consumer_jsonl = tmp_path / "consumer_facts.jsonl"
1499+
rows = [
1500+
_consumer_fact(
1501+
"acs-cd-al01-age-0-4",
1502+
concept="census_acs.person_count",
1503+
domain="total_population",
1504+
source_name="census_acs",
1505+
source_table="ACS S0101 congressional district age",
1506+
period={"type": "calendar_year", "value": 2024},
1507+
geography={
1508+
"level": "congressional_district",
1509+
"id": "5001900US0101",
1510+
"name": "Congressional District 1 (119th Congress), Alabama",
1511+
},
1512+
value=39_908,
1513+
constraints=(
1514+
{
1515+
"variable": "age",
1516+
"operator": ">=",
1517+
"value": 0,
1518+
"unit": "years",
1519+
"role": "filter",
1520+
},
1521+
{
1522+
"variable": "age",
1523+
"operator": "<",
1524+
"value": 5,
1525+
"unit": "years",
1526+
"role": "filter",
1527+
},
1528+
),
1529+
),
1530+
]
1531+
consumer_jsonl.write_text(
1532+
"\n".join(json.dumps(row, sort_keys=True) for row in rows) + "\n"
1533+
)
1534+
1535+
target_set = ArchConsumerFactJSONLTargetProvider(consumer_jsonl).load_target_set(
1536+
TargetQuery(
1537+
period=2024,
1538+
provider_filters={
1539+
"sources": ["CENSUS_ACS"],
1540+
"target_cells": [
1541+
{
1542+
"variable": "person_count",
1543+
"geo_level": "district",
1544+
"geographic_id": "0101",
1545+
"domain_variable": "age",
1546+
},
1547+
],
1548+
},
1549+
)
1550+
)
1551+
1552+
assert len(target_set.targets) == 1
1553+
target = target_set.targets[0]
1554+
assert target.value == 39_908
1555+
assert target.metadata["variable"] == "person_count"
1556+
assert target.metadata["geo_level"] == "district"
1557+
assert target.metadata["source"] == "CENSUS_ACS"
1558+
assert _target_filter_tuples(target) == {
1559+
("age", ">=", "0"),
1560+
("age", "<", "5"),
1561+
("congressional_district_geoid", "==", "0101"),
1562+
}
1563+
1564+
1565+
def test_arch_consumer_fact_jsonl_provider_maps_acs_district_snap_rows(
1566+
tmp_path: Path,
1567+
) -> None:
1568+
consumer_jsonl = tmp_path / "consumer_facts.jsonl"
1569+
geography = {
1570+
"level": "congressional_district",
1571+
"id": "5001900US0101",
1572+
"name": "Congressional District 1 (119th Congress), Alabama",
1573+
}
1574+
rows = [
1575+
_consumer_fact(
1576+
"acs-cd-al01-households-total",
1577+
concept="census_acs.household_count",
1578+
domain="households",
1579+
source_name="census_acs",
1580+
source_table="ACS S2201 congressional district SNAP households",
1581+
period={"type": "calendar_year", "value": 2024},
1582+
geography=geography,
1583+
value=300_636,
1584+
),
1585+
_consumer_fact(
1586+
"acs-cd-al01-households-snap",
1587+
concept="census_acs.household_count",
1588+
domain="households",
1589+
source_name="census_acs",
1590+
source_table="ACS S2201 congressional district SNAP households",
1591+
period={"type": "calendar_year", "value": 2024},
1592+
geography=geography,
1593+
value=34_742,
1594+
constraints=(
1595+
{
1596+
"variable": "snap_receipt_status",
1597+
"operator": "==",
1598+
"value": "receiving_food_stamps_snap",
1599+
"role": "filter",
1600+
},
1601+
),
1602+
),
1603+
]
1604+
consumer_jsonl.write_text(
1605+
"\n".join(json.dumps(row, sort_keys=True) for row in rows) + "\n"
1606+
)
1607+
1608+
target_set = ArchConsumerFactJSONLTargetProvider(consumer_jsonl).load_target_set(
1609+
TargetQuery(
1610+
period=2024,
1611+
provider_filters={
1612+
"sources": ["CENSUS_ACS"],
1613+
"target_cells": [
1614+
{
1615+
"variable": "household_count",
1616+
"geo_level": "district",
1617+
"geographic_id": "0101",
1618+
"domain_variable": "snap",
1619+
},
1620+
],
1621+
},
1622+
)
1623+
)
1624+
1625+
assert len(target_set.targets) == 1
1626+
target = target_set.targets[0]
1627+
assert target.value == 34_742
1628+
assert target.metadata["variable"] == "household_count"
1629+
assert target.metadata["geo_level"] == "district"
1630+
assert target.metadata["source"] == "CENSUS_ACS"
1631+
assert _target_filter_tuples(target) == {
1632+
("congressional_district_geoid", "==", "0101"),
1633+
("snap", ">", "0"),
1634+
}
1635+
1636+
14951637
def test_arch_consumer_fact_jsonl_provider_maps_state_broad_soi_concepts(
14961638
tmp_path: Path,
14971639
) -> None:

0 commit comments

Comments
 (0)