@@ -40,6 +40,7 @@ def create_constituency_target_matrix(
4040 INCOME_VARIABLES = [
4141 "total_income" ,
4242 "self_employment_income" ,
43+ "employment_income" ,
4344 ]
4445
4546 for income_variable in INCOME_VARIABLES :
@@ -81,31 +82,84 @@ def create_constituency_target_matrix(
8182 employment_incomes .employment_income_lower_bound .sort_values ().unique ()
8283 ) + [np .inf ]
8384
85+ employment_incomes_all = (
86+ employment_incomes .groupby ("code" )[
87+ ["employment_income_count" , "employment_income_amount" ]
88+ ]
89+ .sum ()
90+ .reset_index ()
91+ )
92+
93+ hmrc_all_count_target = incomes ["employment_income_count" ].values
94+ ons_all_count_target = employment_incomes_all [
95+ "employment_income_count"
96+ ].values
97+ count_scaling_factors = hmrc_all_count_target / ons_all_count_target
98+
99+ hmrc_all_amount_target = incomes ["employment_income_amount" ].values
100+ ons_all_amount_target = employment_incomes_all [
101+ "employment_income_amount"
102+ ].values
103+ amount_scaling_factors = hmrc_all_amount_target / ons_all_amount_target
104+
84105 for lower_bound , upper_bound in zip (bounds [:- 1 ], bounds [1 :]):
85- if lower_bound < 12_570 or upper_bound > 70_000 :
106+ if (
107+ lower_bound <= 15_000
108+ ): # Skip some targets with very small sample sizes
109+ continue
110+ if upper_bound >= 200_000 :
111+ continue
112+ count_target = (
113+ employment_incomes [
114+ (
115+ employment_incomes .employment_income_lower_bound
116+ == lower_bound
117+ )
118+ & (
119+ employment_incomes .employment_income_upper_bound
120+ == upper_bound
121+ )
122+ ].employment_income_count .values
123+ * count_scaling_factors
124+ )
125+
126+ amount_target = (
127+ employment_incomes [
128+ (
129+ employment_incomes .employment_income_lower_bound
130+ == lower_bound
131+ )
132+ & (
133+ employment_incomes .employment_income_upper_bound
134+ == upper_bound
135+ )
136+ ].employment_income_amount .values
137+ * amount_scaling_factors
138+ )
139+
140+ if count_target .mean () < 200 :
141+ print (
142+ f"Skipping employment income band { lower_bound } to { upper_bound } due to low count target mean: { count_target .mean ()} "
143+ )
144+ continue
145+
146+ if amount_target .mean () < 200 * 30e3 :
147+ print (
148+ f"Skipping employment income band { lower_bound } to { upper_bound } due to low amount target mean: { amount_target .mean ()} "
149+ )
86150 continue
151+
87152 in_bound = (
88153 (employment_income >= lower_bound )
89154 & (employment_income < upper_bound )
90155 & (employment_income != 0 )
91156 & (age >= 16 )
92157 )
93158 band_str = f"{ lower_bound } _{ upper_bound } "
94- matrix [f"hmrc/employment_income/count/{ band_str } " ] = sim .map_result (
95- in_bound , "person" , "household"
96- )
97- y [f"hmrc/employment_income/count/{ band_str } " ] = employment_incomes [
98- (employment_incomes .employment_income_lower_bound == lower_bound )
99- & (employment_incomes .employment_income_upper_bound == upper_bound )
100- ].employment_income_count .values
101-
102159 matrix [f"hmrc/employment_income/amount/{ band_str } " ] = sim .map_result (
103160 employment_income * in_bound , "person" , "household"
104161 )
105- y [f"hmrc/employment_income/amount/{ band_str } " ] = employment_incomes [
106- (employment_incomes .employment_income_lower_bound == lower_bound )
107- & (employment_incomes .employment_income_upper_bound == upper_bound )
108- ].employment_income_amount .values
162+ y [f"hmrc/employment_income/amount/{ band_str } " ] = amount_target
109163
110164 if uprate :
111165 y = uprate_targets (y , time_period )
@@ -128,7 +182,6 @@ def create_constituency_target_matrix(
128182 household_countries = sim .calculate ("country" ).values ,
129183 codes = const_2024 .code ,
130184 )
131-
132185 return matrix , y , country_mask
133186
134187
0 commit comments