@@ -27,14 +27,20 @@ def _parse_uc_national_payment_dist():
2727 household_count = df .iloc [idx , col_idx ]
2828
2929 # Skip missing, ".." (suppressed), or zero values
30- if pd .isna (household_count ) or household_count == ".." or household_count == 0 :
30+ if (
31+ pd .isna (household_count )
32+ or household_count == ".."
33+ or household_count == 0
34+ ):
3135 continue
3236
33- data_rows .append ({
34- "monthly_award_band" : award_band ,
35- "family_type" : family_type ,
36- "household_count" : int (household_count )
37- })
37+ data_rows .append (
38+ {
39+ "monthly_award_band" : award_band ,
40+ "family_type" : family_type ,
41+ "household_count" : int (household_count ),
42+ }
43+ )
3844
3945 result_df = pd .DataFrame (data_rows )
4046
@@ -46,21 +52,30 @@ def parse_band(band):
4652 return float (parts [0 ]) * 12 , float (parts [1 ]) * 12
4753 return None , None
4854
49- result_df [["uc_annual_payment_min" , "uc_annual_payment_max" ]] = result_df ["monthly_award_band" ]. apply (
50- lambda x : pd . Series ( parse_band ( x ))
51- )
55+ result_df [["uc_annual_payment_min" , "uc_annual_payment_max" ]] = result_df [
56+ "monthly_award_band"
57+ ]. apply ( lambda x : pd . Series ( parse_band ( x )) )
5258
5359 # Map family types to constant names
5460 family_type_mapping = {
5561 "Single, no children" : "SINGLE" ,
5662 "Single, with children" : "LONE_PARENT" ,
5763 "Couple, no children" : "COUPLE_NO_CHILDREN" ,
58- "Couple, with children" : "COUPLE_WITH_CHILDREN"
64+ "Couple, with children" : "COUPLE_WITH_CHILDREN" ,
5965 }
60- result_df ["family_type" ] = result_df ["family_type" ].map (family_type_mapping )
66+ result_df ["family_type" ] = result_df ["family_type" ].map (
67+ family_type_mapping
68+ )
6169
6270 # Reorder columns and drop monthly band
63- result_df = result_df [["uc_annual_payment_min" , "uc_annual_payment_max" , "family_type" , "household_count" ]]
71+ result_df = result_df [
72+ [
73+ "uc_annual_payment_min" ,
74+ "uc_annual_payment_max" ,
75+ "family_type" ,
76+ "household_count" ,
77+ ]
78+ ]
6479
6580 return result_df
6681
@@ -80,43 +95,55 @@ def _parse_uc_pc_households():
8095 household_count = df_gb .iloc [idx , 3 ] # Column 3: household count
8196
8297 # Skip if empty, invalid, Total row, or Unknown
83- if pd .isna (constituency ) or pd .isna (household_count ) or constituency in ["Total" , "Unknown" ]:
98+ if (
99+ pd .isna (constituency )
100+ or pd .isna (household_count )
101+ or constituency in ["Total" , "Unknown" ]
102+ ):
84103 continue
85104
86- gb_data_rows .append ({
87- "constituency_name" : constituency ,
88- "household_count" : int (household_count )
89- })
105+ gb_data_rows .append (
106+ {
107+ "constituency_name" : constituency ,
108+ "household_count" : int (household_count ),
109+ }
110+ )
90111
91112 # Parse NI data
92113 ni_file_path = storage_path / "dfc-ni-uc-stats-supp-tables-may-2025.ods"
93- df_ni = pd .read_excel (ni_file_path , sheet_name = '5b' , engine = 'odf' , header = None )
114+ df_ni = pd .read_excel (
115+ ni_file_path , sheet_name = "5b" , engine = "odf" , header = None
116+ )
94117
95118 # Get constituency names from row 2, columns 1-18
96119 ni_constituencies = df_ni .iloc [2 , 1 :19 ].tolist ()
97120
98121 # Find May 2025 row
99- may_2025_row = df_ni [df_ni [0 ] == ' May 2025' ].iloc [0 ]
122+ may_2025_row = df_ni [df_ni [0 ] == " May 2025" ].iloc [0 ]
100123
101124 ni_data_rows = []
102125 for col_idx , constituency_name in enumerate (ni_constituencies , start = 1 ):
103126 household_count = may_2025_row [col_idx ]
104127
105128 if pd .notna (household_count ) and household_count != 0 :
106- ni_data_rows .append ({
107- "constituency_name" : constituency_name ,
108- "household_count" : int (household_count )
109- })
129+ ni_data_rows .append (
130+ {
131+ "constituency_name" : constituency_name ,
132+ "household_count" : int (household_count ),
133+ }
134+ )
110135
111136 # Combine GB and NI data
112137 result_df = pd .DataFrame (gb_data_rows + ni_data_rows )
113138
114139 # Scale constituency counts to match national total
115- national_total = _parse_uc_national_payment_dist ()[' household_count' ].sum ()
116- constituency_total = result_df [' household_count' ].sum ()
140+ national_total = _parse_uc_national_payment_dist ()[" household_count" ].sum ()
141+ constituency_total = result_df [" household_count" ].sum ()
117142 scaling_factor = national_total / constituency_total
118143
119- result_df ['household_count' ] = (result_df ['household_count' ] * scaling_factor ).round ().astype (int )
144+ result_df ["household_count" ] = (
145+ (result_df ["household_count" ] * scaling_factor ).round ().astype (int )
146+ )
120147
121148 return result_df
122149
0 commit comments