@@ -140,6 +140,31 @@ def create_income_model(overwrite_existing: bool = False):
140140 return save_imputation_models ()
141141
142142
143+ def impute_over_incomes (
144+ dataset : UKSingleYearDataset , model , output_variables : list [str ]
145+ ) -> pd .DataFrame :
146+ """
147+ Impute specified income components using trained model.
148+
149+ Args:
150+ dataset: PolicyEngine UK dataset to augment with income data.
151+ output_variables: List of income components to impute.
152+
153+ Returns:
154+ DataFrame with imputed income components.
155+ """
156+ dataset = dataset .copy ()
157+ input_df = Microsimulation (dataset = dataset ).calculate_dataframe (
158+ ["age" , "gender" , "region" ]
159+ )
160+ output_df = model .predict (input_df )
161+
162+ for column in output_variables :
163+ dataset .person [column ] = output_df [column ].fillna (0 ).values
164+
165+ return dataset
166+
167+
143168def impute_income (dataset : UKSingleYearDataset ) -> UKSingleYearDataset :
144169 """
145170 Impute detailed income components using trained model.
@@ -161,16 +186,23 @@ def impute_income(dataset: UKSingleYearDataset) -> UKSingleYearDataset:
161186 zero_weight_copy = subsample_dataset (zero_weight_copy , 10_000 )
162187
163188 model = create_income_model ()
164- sim = Microsimulation (dataset = zero_weight_copy )
165189
166- input_df = sim . calculate_dataframe ([ "age" , "gender" , "region" ])
190+ # Impute just dividends on the original, full variable set on the copy
167191
168- output_df = model .predict (input_df )
192+ zero_weight_copy = impute_over_incomes (
193+ zero_weight_copy ,
194+ model ,
195+ IMPUTATIONS ,
196+ )
169197
170- for column in output_df .columns :
171- zero_weight_copy .person [column ] = output_df [column ].fillna (0 ).values
198+ dataset = impute_over_incomes (
199+ dataset ,
200+ model ,
201+ ["dividend_income" ],
202+ )
172203
173204 zero_weight_copy .validate ()
205+ dataset .validate ()
174206
175207 data = stack_datasets (
176208 dataset ,
0 commit comments