@@ -109,6 +109,15 @@ def _fake_tax_benefit_system():
109109 )
110110
111111
112+ def _fake_variable (entity_key , * , formulas = None , adds = None , subtracts = None ):
113+ return SimpleNamespace (
114+ entity = SimpleNamespace (key = entity_key ),
115+ formulas = formulas or {},
116+ adds = adds ,
117+ subtracts = subtracts ,
118+ )
119+
120+
112121def _write_h5 (path , datasets : dict [str , np .ndarray ]) -> None :
113122 with h5py .File (path , "w" ) as h5_file :
114123 for name , values in datasets .items ():
@@ -151,11 +160,12 @@ def patch_contract_validation(monkeypatch):
151160 monkeypatch .setattr (
152161 upload_module ,
153162 "validate_dataset_contract" ,
154- lambda file_path : validate_dataset_contract (
163+ lambda file_path , ** kwargs : validate_dataset_contract (
155164 file_path ,
156165 tax_benefit_system = _fake_tax_benefit_system (),
157166 microsimulation_cls = _FakeMicrosimulation ,
158167 dataset_loader = lambda path : path ,
168+ ** kwargs ,
159169 ),
160170 )
161171
@@ -221,6 +231,80 @@ def test_validate_dataset_infers_time_period_for_flat_h5(tmp_path, monkeypatch):
221231 assert _TimePeriodCheckingAggregateMicrosimulation .last_dataset .time_period == 2024
222232
223233
234+ def test_validate_cps_allows_source_computed_policyengine_variables (
235+ tmp_path ,
236+ monkeypatch ,
237+ ):
238+ file_path = tmp_path / "cps_2024.h5"
239+ _write_h5 (
240+ file_path ,
241+ {
242+ "person_id" : np .array ([101 ], dtype = np .int32 ),
243+ "household_id" : np .array ([201 ], dtype = np .int32 ),
244+ "employment_income" : np .array ([50_000.0 ], dtype = np .float32 ),
245+ "household_weight" : np .array ([1.0 ], dtype = np .float32 ),
246+ },
247+ )
248+ tbs = _fake_tax_benefit_system ()
249+ tbs .variables ["employment_income" ] = _fake_variable (
250+ "person" ,
251+ adds = ["employment_income_before_lsr" ],
252+ )
253+ monkeypatch .setattr (
254+ upload_module ,
255+ "validate_dataset_contract" ,
256+ lambda file_path , ** kwargs : validate_dataset_contract (
257+ file_path ,
258+ tax_benefit_system = tbs ,
259+ microsimulation_cls = _FakeMicrosimulation ,
260+ dataset_loader = lambda path : path ,
261+ ** kwargs ,
262+ ),
263+ )
264+ monkeypatch .setattr (
265+ "policyengine_us.Microsimulation" ,
266+ _TimePeriodCheckingAggregateMicrosimulation ,
267+ )
268+
269+ validate_dataset (file_path )
270+
271+
272+ def test_validate_enhanced_cps_rejects_computed_policyengine_variables (
273+ tmp_path ,
274+ monkeypatch ,
275+ ):
276+ file_path = tmp_path / "enhanced_cps_2024.h5"
277+ _write_h5 (file_path , _minimal_enhanced_cps_contract_datasets ())
278+ tbs = _fake_tax_benefit_system ()
279+ tbs .variables ["employment_income" ] = _fake_variable (
280+ "person" ,
281+ adds = ["employment_income_before_lsr" ],
282+ )
283+ monkeypatch .setattr (
284+ upload_module ,
285+ "REQUIRED_VARIABLES_BY_FILENAME" ,
286+ {},
287+ )
288+ monkeypatch .setattr (
289+ upload_module ,
290+ "validate_dataset_contract" ,
291+ lambda file_path , ** kwargs : validate_dataset_contract (
292+ file_path ,
293+ tax_benefit_system = tbs ,
294+ microsimulation_cls = _FakeMicrosimulation ,
295+ dataset_loader = lambda path : path ,
296+ ** kwargs ,
297+ ),
298+ )
299+ monkeypatch .setattr (
300+ "policyengine_us.Microsimulation" ,
301+ _TimePeriodCheckingAggregateMicrosimulation ,
302+ )
303+
304+ with pytest .raises (DatasetValidationError , match = "employment_income" ):
305+ validate_dataset (file_path )
306+
307+
224308def test_validate_dataset_rejects_temporary_reported_source_variables (
225309 tmp_path ,
226310 monkeypatch ,
0 commit comments