@@ -62,14 +62,49 @@ class _StubDataset:
6262 regression test.
6363 """
6464
65- def __init__ (self , weights : np .ndarray ):
65+ def __init__ (self , weights : np .ndarray , ** household_columns ):
6666 self .household = pd .DataFrame ({"household_weight" : weights .astype (float )})
67+ for column , values in household_columns .items ():
68+ self .household [column ] = values
6769
6870 def copy (self ) -> "_StubDataset" :
69- copy = _StubDataset (self .household ["household_weight" ].to_numpy ())
71+ extra_columns = {
72+ column : self .household [column ].to_numpy (copy = True )
73+ for column in self .household .columns
74+ if column != "household_weight"
75+ }
76+ copy = _StubDataset (
77+ self .household ["household_weight" ].to_numpy (),
78+ ** extra_columns ,
79+ )
7080 return copy
7181
7282
83+ def test_initialize_weight_priors_gives_zero_weight_rows_balanced_mass ():
84+ from policyengine_uk_data .utils .calibrate import initialize_weight_priors
85+
86+ weights = np .array ([1_500.0 , 0.0 , 625.0 , 0.0 ], dtype = np .float64 )
87+
88+ priors = initialize_weight_priors (weights )
89+
90+ assert np .all (priors > 0 )
91+ assert priors .sum () == pytest .approx (weights .sum ())
92+ assert priors [[0 , 2 ]].sum () == pytest .approx (weights .sum () / 2 )
93+ assert priors [[1 , 3 ]].sum () == pytest .approx (weights .sum () / 2 )
94+ assert priors [1 ] == pytest .approx (priors [3 ])
95+ assert priors [0 ] / priors [2 ] == pytest .approx (weights [0 ] / weights [2 ])
96+
97+
98+ def test_initialize_weight_priors_preserves_positive_weights_exactly ():
99+ from policyengine_uk_data .utils .calibrate import initialize_weight_priors
100+
101+ weights = np .array ([1_500.0 , 400.0 , 625.0 ], dtype = np .float64 )
102+
103+ priors = initialize_weight_priors (weights )
104+
105+ np .testing .assert_array_equal (priors , weights )
106+
107+
73108def test_calibrate_local_areas_saves_weights_in_nonverbose_branch (
74109 tmp_path , monkeypatch
75110):
@@ -159,3 +194,67 @@ def sparse_matrix_fn(dataset):
159194 with h5py .File (tmp_path / weight_file , "r" ) as f :
160195 weights = f ["2025" ][:]
161196 assert np .isfinite (weights ).all ()
197+
198+
199+ def test_calibrate_local_areas_logs_loss_targets_and_source_diagnostics (
200+ tmp_path , monkeypatch
201+ ):
202+ import h5py
203+
204+ from policyengine_uk_data .utils import calibrate as calibrate_module
205+ from policyengine_uk_data .utils .calibrate import calibrate_local_areas
206+
207+ monkeypatch .setattr (calibrate_module , "STORAGE_FOLDER" , tmp_path )
208+
209+ matrix_fn , national_matrix_fn = _make_toy_inputs (n_households = 4 , area_count = 2 )
210+ dataset = _StubDataset (
211+ np .array ([4.0 , 0.0 , 4.0 , 0.0 ]),
212+ household_is_spi_synthetic = [False , True , False , True ],
213+ )
214+
215+ def get_performance (weights , _m_c , _y_c , m_n , y_n , _excluded_targets ):
216+ estimates = weights .sum (axis = 0 ) @ m_n
217+ error = float (estimates .iloc [0 ] - y_n .iloc [0 ])
218+ return pd .DataFrame (
219+ {
220+ "name" : ["UK" ],
221+ "metric" : ["national_total" ],
222+ "estimate" : [float (estimates .iloc [0 ])],
223+ "target" : [float (y_n .iloc [0 ])],
224+ "error" : [error ],
225+ "abs_error" : [abs (error )],
226+ "rel_abs_error" : [abs (error ) / float (y_n .iloc [0 ])],
227+ "validation" : [False ],
228+ }
229+ )
230+
231+ weight_file = "toy_diagnostic_weights.h5"
232+ log_csv = tmp_path / "diagnostics.csv"
233+ calibrate_local_areas (
234+ dataset = dataset ,
235+ matrix_fn = matrix_fn ,
236+ national_matrix_fn = national_matrix_fn ,
237+ area_count = 2 ,
238+ weight_file = weight_file ,
239+ dataset_key = "2025" ,
240+ epochs = 1 ,
241+ log_csv = log_csv ,
242+ get_performance = get_performance ,
243+ verbose = False ,
244+ )
245+
246+ with h5py .File (tmp_path / weight_file , "r" ) as f :
247+ weights = f ["2025" ][:]
248+ assert weights [:, [1 , 3 ]].sum () > 0
249+
250+ diagnostics = pd .read_csv (log_csv )
251+ row = diagnostics .iloc [0 ]
252+ assert row ["target_name" ] == "UK/national_total"
253+ assert np .isfinite (row ["loss" ])
254+ assert np .isfinite (row ["training_loss" ])
255+ assert np .isfinite (row ["saved_weights_loss" ])
256+ assert row ["initial_zero_weight_rows" ] == 2
257+ assert row ["initial_zero_weight_prior_share" ] == pytest .approx (0.5 )
258+ assert row ["household_is_spi_synthetic_rows" ] == 2
259+ assert row ["household_is_spi_synthetic_prior_share" ] == pytest .approx (0.5 )
260+ assert row ["household_is_spi_synthetic_household_weight" ] > 0
0 commit comments