1- """Shared fixtures for local area calibration tests."""
1+ """Shared fixtures for local area calibration tests.
2+
3+ Importantly, this file determines which variables will be included in the sparse matrix and calibrating routine.
4+ """
25
36import pytest
47import numpy as np
1619 get_calculated_variables ,
1720)
1821
22+ # Variables to test for state-level value matching
23+ # Format: (variable_name, rtol)
24+ # variable_name as per the targets in policy_data.db
25+ # rtol is relative tolerance for comparison
26+ VARIABLES_TO_TEST = [
27+ ("snap" , 1e-2 ),
28+ ("health_insurance_premiums_without_medicare_part_b" , 1e-2 ),
29+ ("medicaid" , 1e-2 ),
30+ ("medicare_part_b_premiums" , 1e-2 ),
31+ ("other_medical_expenses" , 1e-2 ),
32+ ("over_the_counter_health_expenses" , 1e-2 ),
33+ ("salt_deduction" , 1e-2 ),
34+ ("spm_unit_capped_work_childcare_expenses" , 1e-2 ),
35+ ("spm_unit_capped_housing_subsidy" , 1e-2 ),
36+ ("ssi" , 1e-2 ),
37+ ("tanf" , 1e-2 ),
38+ ("tip_income" , 1e-2 ),
39+ ("unemployment_compensation" , 1e-2 ),
40+ ]
41+
42+ # Combined filter config to build matrix with all variables at once
43+ COMBINED_FILTER_CONFIG = {
44+ "stratum_group_ids" : [
45+ 4 , # SNAP targets
46+ 5 , # Medicaid targets
47+ 112 , # Unemployment compensation targets
48+ ],
49+ "variables" : [
50+ "snap" ,
51+ "health_insurance_premiums_without_medicare_part_b" ,
52+ "medicaid" ,
53+ "medicare_part_b_premiums" ,
54+ "other_medical_expenses" ,
55+ "over_the_counter_health_expenses" ,
56+ "salt_deduction" ,
57+ "spm_unit_capped_work_childcare_expenses" ,
58+ "spm_unit_capped_housing_subsidy" ,
59+ "ssi" ,
60+ "tanf" ,
61+ "tip_income" ,
62+ "unemployment_compensation" ,
63+ ],
64+ }
65+
66+ # Maximum allowed mismatch rate for state-level value comparison
67+ MAX_MISMATCH_RATE = 0.02
68+
69+ # Number of samples for cell-level verification tests
70+ N_VERIFICATION_SAMPLES = 200
71+
1972
2073@pytest .fixture (scope = "module" )
2174def db_uri ():
@@ -30,7 +83,7 @@ def dataset_path():
3083
3184@pytest .fixture (scope = "module" )
3285def test_cds (db_uri ):
33- """CDs from NC, HI, MT, AK (manageable size, multiple same-state CDs) ."""
86+ """CDs from multiple states for comprehensive testing ."""
3487 engine = create_engine (db_uri )
3588 query = """
3689 SELECT DISTINCT sc.value as cd_geoid
@@ -43,6 +96,10 @@ def test_cds(db_uri):
4396 OR sc.value LIKE '150_'
4497 OR sc.value LIKE '300_'
4598 OR sc.value = '200' OR sc.value = '201'
99+ OR sc.value IN ('101', '102')
100+ OR sc.value IN ('601', '602')
101+ OR sc.value IN ('3601', '3602')
102+ OR sc.value IN ('4801', '4802')
46103 )
47104 ORDER BY sc.value
48105 """
@@ -58,15 +115,15 @@ def sim(dataset_path):
58115
59116@pytest .fixture (scope = "module" )
60117def matrix_data (db_uri , dataset_path , test_cds , sim ):
61- """Build sparse matrix, return (targets_df, X_sparse, household_id_mapping) ."""
118+ """Build sparse matrix with all configured variables ."""
62119 builder = SparseMatrixBuilder (
63120 db_uri ,
64121 time_period = 2023 ,
65122 cds_to_calibrate = test_cds ,
66123 dataset_path = dataset_path ,
67124 )
68125 targets_df , X_sparse , household_id_mapping = builder .build_matrix (
69- sim , target_filter = { "stratum_group_ids" : [ 4 ], "variables" : [ "snap" ]}
126+ sim , target_filter = COMBINED_FILTER_CONFIG
70127 )
71128 return targets_df , X_sparse , household_id_mapping
72129
0 commit comments