@@ -32,25 +32,44 @@ def create_local_authority_target_matrix(
3232 y = pd .DataFrame ()
3333
3434 INCOME_VARIABLES = [
35- "total_income" ,
3635 "self_employment_income" ,
36+ "employment_income" ,
3737 ]
3838
39+ national_incomes = pd .read_csv (STORAGE_FOLDER / "incomes_projection.csv" )
40+ national_incomes = national_incomes [national_incomes .year == 2025 ]
41+
3942 for income_variable in INCOME_VARIABLES :
4043 income_values = sim .calculate (income_variable ).values
4144 in_spi_frame = sim .calculate ("income_tax" ).values > 0
4245 matrix [f"hmrc/{ income_variable } /amount" ] = sim .map_result (
4346 income_values * in_spi_frame , "person" , "household"
4447 )
45- y [f"hmrc/{ income_variable } /amount" ] = incomes [
46- f"{ income_variable } _amount"
47- ].values
48+ local_targets = incomes [f"{ income_variable } _amount" ].values
49+ local_target_sum = local_targets .sum ()
50+ national_target = national_incomes [
51+ (national_incomes .total_income_lower_bound == 12_570 )
52+ & (national_incomes .total_income_upper_bound == np .inf )
53+ ][income_variable + "_amount" ].iloc [0 ]
54+ national_consistency_adjustment_factor = (
55+ national_target / local_target_sum
56+ )
57+ y [f"hmrc/{ income_variable } /amount" ] = (
58+ local_targets * national_consistency_adjustment_factor
59+ )
4860 matrix [f"hmrc/{ income_variable } /count" ] = sim .map_result (
4961 (income_values != 0 ) * in_spi_frame , "person" , "household"
5062 )
51- y [f"hmrc/{ income_variable } /count" ] = incomes [
52- f"{ income_variable } _count"
53- ].values
63+ local_targets = incomes [f"{ income_variable } _count" ].values
64+ local_target_sum = local_targets .sum ()
65+ national_target = national_incomes [
66+ (national_incomes .total_income_lower_bound == 12_570 )
67+ & (national_incomes .total_income_upper_bound == np .inf )
68+ ][income_variable + "_count" ].iloc [0 ]
69+ y [f"hmrc/{ income_variable } /count" ] = (
70+ incomes [f"{ income_variable } _count" ].values
71+ * national_consistency_adjustment_factor
72+ )
5473
5574 age = sim .calculate ("age" ).values
5675 for lower_age in range (0 , 80 , 10 ):
@@ -75,61 +94,30 @@ def create_local_authority_target_matrix(
7594 employment_incomes .employment_income_lower_bound .sort_values ().unique ()
7695 ) + [np .inf ]
7796
78- employment_incomes_all = (
79- employment_incomes .groupby ("code" )[
80- ["employment_income_count" , "employment_income_amount" ]
81- ]
82- .sum ()
83- .reset_index ()
84- )
85-
86- hmrc_all_count_target = incomes ["employment_income_count" ].values
87- ons_all_count_target = employment_incomes_all [
88- "employment_income_count"
89- ].values
90- count_scaling_factors = hmrc_all_count_target / ons_all_count_target
91-
92- hmrc_all_amount_target = incomes ["employment_income_amount" ].values
93- ons_all_amount_target = employment_incomes_all [
94- "employment_income_amount"
95- ].values
96- amount_scaling_factors = hmrc_all_amount_target / ons_all_amount_target
97-
9897 for lower_bound , upper_bound in zip (bounds [:- 1 ], bounds [1 :]):
9998 if (
10099 lower_bound <= 15_000
101100 ): # Skip some targets with very small sample sizes
102101 continue
103102 if upper_bound >= 200_000 :
104103 continue
105- count_target = (
106- employment_incomes [
107- (
108- employment_incomes .employment_income_lower_bound
109- == lower_bound
110- )
111- & (
112- employment_incomes .employment_income_upper_bound
113- == upper_bound
114- )
115- ].employment_income_count .values
116- * count_scaling_factors
117- )
118104
119- amount_target = (
120- employment_incomes [
121- (
122- employment_incomes .employment_income_lower_bound
123- == lower_bound
124- )
125- & (
126- employment_incomes .employment_income_upper_bound
127- == upper_bound
128- )
129- ].employment_income_amount .values
130- * amount_scaling_factors
131- )
105+ national_data_row = national_incomes [
106+ national_incomes .total_income_lower_bound == lower_bound
107+ ]["employment_income_amount" ].iloc [0 ]
108+
109+ count_target = employment_incomes [
110+ (employment_incomes .employment_income_lower_bound == lower_bound )
111+ & (employment_incomes .employment_income_upper_bound == upper_bound )
112+ ].employment_income_count .values
132113
114+ amount_target = employment_incomes [
115+ (employment_incomes .employment_income_lower_bound == lower_bound )
116+ & (employment_incomes .employment_income_upper_bound == upper_bound )
117+ ].employment_income_amount .values
118+ sum_of_local_area_values = amount_target .sum ()
119+
120+ adjustment = national_data_row / sum_of_local_area_values
133121 if count_target .mean () < 200 :
134122 print (
135123 f"Skipping employment income band { lower_bound } to { upper_bound } due to low count target mean: { count_target .mean ()} "
@@ -152,7 +140,9 @@ def create_local_authority_target_matrix(
152140 matrix [f"hmrc/employment_income/amount/{ band_str } " ] = sim .map_result (
153141 employment_income * in_bound , "person" , "household"
154142 )
155- y [f"hmrc/employment_income/amount/{ band_str } " ] = amount_target
143+ y [f"hmrc/employment_income/amount/{ band_str } " ] = (
144+ amount_target * adjustment
145+ )
156146
157147 if uprate :
158148 y = uprate_targets (y , time_period )
@@ -221,30 +211,7 @@ def uprate_targets(y: pd.DataFrame, target_year: int = 2025) -> pd.DataFrame:
221211 uprating_from_2020 [is_uprated_from_2020 ] = rel_change_20_final [
222212 is_uprated_from_2020
223213 ]
224-
225- rel_change_21_final = (weights_final @ matrix_final ) / (
226- weights_21 @ matrix_21
227- ) - 1
228- is_uprated_from_2021 = [
229- col .startswith ("hmrc/" ) for col in matrix_21 .columns
230- ]
231- uprating_from_2021 = np .zeros_like (matrix_21 .columns , dtype = float )
232- uprating_from_2021 [is_uprated_from_2021 ] = rel_change_21_final [
233- is_uprated_from_2021
234- ]
235-
236- rel_change_23_final = (weights_final @ matrix_final ) / (
237- weights_23 @ matrix_23
238- ) - 1
239- is_uprated_from_2023 = [
240- col .startswith ("hmrc/" ) for col in matrix_23 .columns
241- ]
242- uprating_from_2023 = np .zeros_like (matrix_23 .columns , dtype = float )
243- uprating_from_2023 [is_uprated_from_2023 ] = rel_change_23_final [
244- is_uprated_from_2023
245- ]
246-
247- uprating = uprating_from_2020 + uprating_from_2021 + uprating_from_2023
214+ uprating = uprating_from_2020
248215 y = y * (1 + uprating )
249216
250217 return y
0 commit comments