22import numpy as np
33import pytest
44
5+ from policyengine_us_data .datasets .puf import puf as puf_module
56from policyengine_us_data .datasets .puf .puf import (
67 PUF ,
78 QBI_SIMULATION_VERSION ,
@@ -14,6 +15,19 @@ def _mark_current_qbi_simulation(file_handle):
1415 file_handle .attrs [QBI_SIMULATION_VERSION_ATTR ] = QBI_SIMULATION_VERSION
1516
1617
18+ def _write_capital_gains_basis_source_file (path ):
19+ with h5py .File (path , "w" ) as file_handle :
20+ file_handle .create_dataset ("person_id" , data = np .array ([1 , 2 , 3 , 4 ]))
21+ file_handle .create_dataset ("person_tax_unit_id" , data = np .array ([1 , 1 , 2 , 2 ]))
22+ file_handle .create_dataset ("person_household_id" , data = np .array ([1 , 1 , 2 , 2 ]))
23+ file_handle .create_dataset ("household_id" , data = np .array ([1 , 2 ]))
24+ file_handle .create_dataset ("household_weight" , data = np .array ([100.0 , 200.0 ]))
25+ file_handle .create_dataset (
26+ "long_term_capital_gains" ,
27+ data = np .array ([100.0 , - 40.0 , 0.0 , 200.0 ]),
28+ )
29+
30+
1731@pytest .mark .skip (reason = "This test requires private data." )
1832@pytest .mark .parametrize ("year" , [2015 ])
1933def test_irs_puf_generates (year : int ):
@@ -50,6 +64,78 @@ def test_puf_person_split_keeps_capital_gains_holding_period_collapsed():
5064 )
5165
5266
67+ def test_puf_load_dataset_backfills_capital_gains_basis_inputs (
68+ tmp_path ,
69+ monkeypatch ,
70+ ):
71+ monkeypatch .setattr (
72+ puf_module ,
73+ "has_policyengine_us_variables" ,
74+ lambda * variables : True ,
75+ )
76+
77+ class DummyPUF (PUF ):
78+ label = "Dummy PUF"
79+ name = "dummy_puf"
80+ time_period = 2024
81+ file_path = tmp_path / "dummy_puf.h5"
82+
83+ _write_capital_gains_basis_source_file (DummyPUF .file_path )
84+
85+ arrays = DummyPUF ().load_dataset ()
86+
87+ basis = arrays ["long_term_capital_gains_basis" ]
88+ years = arrays ["long_term_capital_gains_years_held" ]
89+ gains = arrays ["long_term_capital_gains" ]
90+
91+ assert np .all (basis [gains != 0 ] > 0 )
92+ assert np .all (years [gains != 0 ] > 0 )
93+ assert np .all (basis [gains == 0 ] == 0 )
94+ assert np .all (years [gains == 0 ] == 0 )
95+
96+ with h5py .File (DummyPUF .file_path , "r" ) as file_handle :
97+ assert "long_term_capital_gains_basis" in file_handle
98+ assert "long_term_capital_gains_years_held" in file_handle
99+
100+
101+ def test_puf_load_key_backfills_read_only_capital_gains_basis_inputs (
102+ tmp_path ,
103+ monkeypatch ,
104+ ):
105+ monkeypatch .setattr (
106+ puf_module ,
107+ "has_policyengine_us_variables" ,
108+ lambda * variables : True ,
109+ )
110+
111+ class DummyPUF (PUF ):
112+ label = "Dummy PUF"
113+ name = "dummy_puf"
114+ time_period = 2024
115+ file_path = tmp_path / "dummy_puf.h5"
116+
117+ _write_capital_gains_basis_source_file (DummyPUF .file_path )
118+ DummyPUF .file_path .chmod (0o444 )
119+
120+ dataset = DummyPUF ()
121+ try :
122+ basis = dataset .load ("long_term_capital_gains_basis" )
123+ years = dataset .load ("long_term_capital_gains_years_held" )
124+ reader = dataset .load ()
125+ np .testing .assert_array_equal (
126+ reader ["long_term_capital_gains_basis" ],
127+ basis ,
128+ )
129+ reader .close ()
130+ finally :
131+ DummyPUF .file_path .chmod (0o644 )
132+
133+ assert np .all (basis [[0 , 1 , 3 ]] > 0 )
134+ assert basis [2 ] == 0
135+ assert np .all (years [[0 , 1 , 3 ]] > 0 )
136+ assert years [2 ] == 0
137+
138+
53139def test_puf_load_dataset_backfills_sstb_split_inputs (tmp_path ):
54140 class DummyPUF (PUF ):
55141 label = "Dummy PUF"
0 commit comments