@@ -75,31 +75,84 @@ def create_local_authority_target_matrix(
7575 employment_incomes .employment_income_lower_bound .sort_values ().unique ()
7676 ) + [np .inf ]
7777
78+ employment_incomes_all = (
79+ employment_incomes .groupby ("code" )[
80+ ["employment_income_count" , "employment_income_amount" ]
81+ ]
82+ .sum ()
83+ .reset_index ()
84+ )
85+
86+ hmrc_all_count_target = incomes ["employment_income_count" ].values
87+ ons_all_count_target = employment_incomes_all [
88+ "employment_income_count"
89+ ].values
90+ count_scaling_factors = hmrc_all_count_target / ons_all_count_target
91+
92+ hmrc_all_amount_target = incomes ["employment_income_amount" ].values
93+ ons_all_amount_target = employment_incomes_all [
94+ "employment_income_amount"
95+ ].values
96+ amount_scaling_factors = hmrc_all_amount_target / ons_all_amount_target
97+
7898 for lower_bound , upper_bound in zip (bounds [:- 1 ], bounds [1 :]):
79- if lower_bound >= 70_000 or lower_bound < 12_570 :
99+ if (
100+ lower_bound <= 15_000
101+ ): # Skip some targets with very small sample sizes
80102 continue
103+ if upper_bound >= 200_000 :
104+ continue
105+ count_target = (
106+ employment_incomes [
107+ (
108+ employment_incomes .employment_income_lower_bound
109+ == lower_bound
110+ )
111+ & (
112+ employment_incomes .employment_income_upper_bound
113+ == upper_bound
114+ )
115+ ].employment_income_count .values
116+ * count_scaling_factors
117+ )
118+
119+ amount_target = (
120+ employment_incomes [
121+ (
122+ employment_incomes .employment_income_lower_bound
123+ == lower_bound
124+ )
125+ & (
126+ employment_incomes .employment_income_upper_bound
127+ == upper_bound
128+ )
129+ ].employment_income_amount .values
130+ * amount_scaling_factors
131+ )
132+
133+ if count_target .mean () < 200 :
134+ print (
135+ f"Skipping employment income band { lower_bound } to { upper_bound } due to low count target mean: { count_target .mean ()} "
136+ )
137+ continue
138+
139+ if amount_target .mean () < 200 * 30e3 :
140+ print (
141+ f"Skipping employment income band { lower_bound } to { upper_bound } due to low amount target mean: { amount_target .mean ()} "
142+ )
143+ continue
144+
81145 in_bound = (
82146 (employment_income >= lower_bound )
83147 & (employment_income < upper_bound )
84148 & (employment_income != 0 )
85149 & (age >= 16 )
86150 )
87151 band_str = f"{ lower_bound } _{ upper_bound } "
88- matrix [f"hmrc/employment_income/count/{ band_str } " ] = sim .map_result (
89- in_bound , "person" , "household"
90- )
91- y [f"hmrc/employment_income/count/{ band_str } " ] = employment_incomes [
92- (employment_incomes .employment_income_lower_bound == lower_bound )
93- & (employment_incomes .employment_income_upper_bound == upper_bound )
94- ].employment_income_count .values
95-
96152 matrix [f"hmrc/employment_income/amount/{ band_str } " ] = sim .map_result (
97153 employment_income * in_bound , "person" , "household"
98154 )
99- y [f"hmrc/employment_income/amount/{ band_str } " ] = employment_incomes [
100- (employment_incomes .employment_income_lower_bound == lower_bound )
101- & (employment_incomes .employment_income_upper_bound == upper_bound )
102- ].employment_income_amount .values
155+ y [f"hmrc/employment_income/amount/{ band_str } " ] = amount_target
103156
104157 if uprate :
105158 y = uprate_targets (y , time_period )
0 commit comments