@@ -936,6 +936,17 @@ def _consumer_fact(
936936 }
937937
938938
939+ def _target_filter_tuples (target : Any ) -> set [tuple [str , str , str ]]:
940+ return {
941+ (
942+ str (target_filter .feature ),
943+ str (getattr (target_filter .operator , "value" , target_filter .operator )),
944+ str (target_filter .value ),
945+ )
946+ for target_filter in target .filters
947+ }
948+
949+
939950def _normalize_target_behavior (target_set ) -> list [tuple [Any , ...]]:
940951 rows = []
941952 for target in target_set .targets :
@@ -1191,6 +1202,139 @@ def test_arch_consumer_fact_jsonl_provider_maps_historic_table_2_concepts(
11911202 assert tax_filer_individuals .aggregation .value == "count"
11921203
11931204
1205+ def test_arch_consumer_fact_jsonl_provider_maps_state_soi_rows (
1206+ tmp_path : Path ,
1207+ ) -> None :
1208+ consumer_jsonl = tmp_path / "consumer_facts.jsonl"
1209+ rows = [
1210+ _consumer_fact (
1211+ "state-ca-agi-50k-75k" ,
1212+ concept = "irs_soi.adjusted_gross_income" ,
1213+ domain = "all_individual_income_tax_returns" ,
1214+ source_name = "irs_soi" ,
1215+ source_table = "Historic Table 2 state AGI facts" ,
1216+ period = {"type" : "tax_year" , "value" : 2022 },
1217+ geography = {"level" : "state" , "id" : "0400000US06" , "name" : "California" },
1218+ value = 123_456_000_000 ,
1219+ unit = "usd" ,
1220+ constraints = (
1221+ {
1222+ "variable" : "us:statutes/26/62#adjusted_gross_income" ,
1223+ "operator" : ">=" ,
1224+ "value" : 50_000 ,
1225+ "unit" : "usd" ,
1226+ "role" : "filter" ,
1227+ },
1228+ {
1229+ "variable" : "us:statutes/26/62#adjusted_gross_income" ,
1230+ "operator" : "<" ,
1231+ "value" : 75_000 ,
1232+ "unit" : "usd" ,
1233+ "role" : "filter" ,
1234+ },
1235+ ),
1236+ ),
1237+ _consumer_fact (
1238+ "state-ca-eitc-amount" ,
1239+ concept = "irs_soi.earned_income_credit" ,
1240+ domain = "individual_income_tax_returns" ,
1241+ source_name = "irs_soi" ,
1242+ source_table = "Historic Table 2 state EITC totals" ,
1243+ period = {"type" : "tax_year" , "value" : 2022 },
1244+ geography = {"level" : "state" , "id" : "0400000US06" , "name" : "California" },
1245+ value = 5_770_703_000 ,
1246+ unit = "usd" ,
1247+ ),
1248+ ]
1249+ consumer_jsonl .write_text (
1250+ "\n " .join (json .dumps (row , sort_keys = True ) for row in rows ) + "\n "
1251+ )
1252+
1253+ target_set = ArchConsumerFactJSONLTargetProvider (consumer_jsonl ).load_target_set (
1254+ TargetQuery (period = 2022 )
1255+ )
1256+ targets_by_arch_variable = {
1257+ target .metadata ["arch_variable" ]: target for target in target_set .targets
1258+ }
1259+
1260+ agi = targets_by_arch_variable ["adjusted_gross_income" ]
1261+ assert agi .metadata ["variable" ] == "adjusted_gross_income"
1262+ assert agi .metadata ["geo_level" ] == "state"
1263+ assert agi .metadata ["geography_id" ] == "0400000US06"
1264+ assert agi .measure == "adjusted_gross_income"
1265+ assert agi .aggregation .value == "sum"
1266+ assert _target_filter_tuples (agi ) == {
1267+ ("tax_unit_is_filer" , "==" , "1" ),
1268+ ("adjusted_gross_income" , ">=" , "50000" ),
1269+ ("adjusted_gross_income" , "<" , "75000" ),
1270+ ("state_fips" , "==" , "06" ),
1271+ }
1272+
1273+ eitc = targets_by_arch_variable ["eitc_amount" ]
1274+ assert eitc .metadata ["variable" ] == "eitc"
1275+ assert eitc .metadata ["geo_level" ] == "state"
1276+ assert eitc .measure == "eitc"
1277+ assert eitc .aggregation .value == "sum"
1278+ assert _target_filter_tuples (eitc ) == {
1279+ ("tax_unit_is_filer" , "==" , "1" ),
1280+ ("state_fips" , "==" , "06" ),
1281+ }
1282+
1283+
1284+ def test_arch_consumer_fact_jsonl_provider_maps_eitc_by_agi_and_children (
1285+ tmp_path : Path ,
1286+ ) -> None :
1287+ consumer_jsonl = tmp_path / "consumer_facts.jsonl"
1288+ row = _consumer_fact (
1289+ "eitc-three-child-50k-75k-returns" ,
1290+ concept = "irs_soi.returns_with_total_earned_income_credit" ,
1291+ domain = "individual_income_tax_returns_with_earned_income_credit" ,
1292+ source_name = "irs_soi" ,
1293+ source_table = "Publication 1304 Table 2.5 EITC by AGI and qualifying children" ,
1294+ period = {"type" : "tax_year" , "value" : 2022 },
1295+ value = 97_411 ,
1296+ constraints = (
1297+ {
1298+ "variable" : "us:statutes/26/62#adjusted_gross_income" ,
1299+ "operator" : ">=" ,
1300+ "value" : 50_000 ,
1301+ "unit" : "usd" ,
1302+ "role" : "filter" ,
1303+ },
1304+ {
1305+ "variable" : "us:statutes/26/62#adjusted_gross_income" ,
1306+ "operator" : "<" ,
1307+ "value" : 75_000 ,
1308+ "unit" : "usd" ,
1309+ "role" : "filter" ,
1310+ },
1311+ {
1312+ "variable" : "us.tax.earned_income_credit_qualifying_children" ,
1313+ "operator" : "==" ,
1314+ "value" : 3 ,
1315+ "unit" : "count" ,
1316+ "role" : "filter" ,
1317+ },
1318+ ),
1319+ )
1320+ consumer_jsonl .write_text (json .dumps (row , sort_keys = True ) + "\n " )
1321+
1322+ target_set = ArchConsumerFactJSONLTargetProvider (consumer_jsonl ).load_target_set (
1323+ TargetQuery (period = 2022 )
1324+ )
1325+ target = target_set .targets [0 ]
1326+
1327+ assert target .metadata ["arch_variable" ] == "eitc_claims"
1328+ assert target .metadata ["variable" ] == "eitc"
1329+ assert target .aggregation .value == "count"
1330+ assert _target_filter_tuples (target ) == {
1331+ ("eitc" , ">" , "0" ),
1332+ ("adjusted_gross_income" , ">=" , "50000" ),
1333+ ("adjusted_gross_income" , "<" , "75000" ),
1334+ ("eitc_child_count" , "==" , "3" ),
1335+ }
1336+
1337+
11941338def test_arch_consumer_fact_jsonl_provider_maps_us_admin_source_families (
11951339 tmp_path : Path ,
11961340) -> None :
0 commit comments