From 9bce4f2f39109b4525a017a06fe53c63523a163f Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 10 Jul 2025 10:26:33 +0100 Subject: [PATCH 1/7] Fix SSMG uprating --- changelog_entry.yaml | 4 ++++ policyengine_uk_data/datasets/frs/frs.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..01d4561b2 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - SSMG uprating. diff --git a/policyengine_uk_data/datasets/frs/frs.py b/policyengine_uk_data/datasets/frs/frs.py index ea092d7e0..49720c3e2 100644 --- a/policyengine_uk_data/datasets/frs/frs.py +++ b/policyengine_uk_data/datasets/frs/frs.py @@ -725,6 +725,10 @@ def add_benefit_income( np.array(frs["winter_fuel_allowance_reported"]) / 52 ) # This is not weeklyised by default (paid once per year) + frs["ssmg_reported"] = ( + np.array(frs["ssmg_reported"]) / 52 + ) + frs["statutory_sick_pay"] = person.SSPADJ * 52 frs["statutory_maternity_pay"] = person.SMPADJ * 52 From 3995ec624f8d8982d95a212f3ce961bc826ee2ce Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 10 Jul 2025 10:42:45 +0100 Subject: [PATCH 2/7] Remove change --- policyengine_uk_data/datasets/frs/frs.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/policyengine_uk_data/datasets/frs/frs.py b/policyengine_uk_data/datasets/frs/frs.py index 49720c3e2..ea092d7e0 100644 --- a/policyengine_uk_data/datasets/frs/frs.py +++ b/policyengine_uk_data/datasets/frs/frs.py @@ -725,10 +725,6 @@ def add_benefit_income( np.array(frs["winter_fuel_allowance_reported"]) / 52 ) # This is not weeklyised by default (paid once per year) - frs["ssmg_reported"] = ( - np.array(frs["ssmg_reported"]) / 52 - ) - frs["statutory_sick_pay"] = person.SSPADJ * 52 frs["statutory_maternity_pay"] = person.SMPADJ * 52 From 6f1844d4c03134d01264b075944567fa3c28698a Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 10 Jul 2025 11:40:53 +0100 Subject: [PATCH 3/7] Add safety check to cg imputation --- policyengine_uk_data/utils/imputations/capital_gains.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/policyengine_uk_data/utils/imputations/capital_gains.py b/policyengine_uk_data/utils/imputations/capital_gains.py index 536c7d18b..51d15b417 100644 --- a/policyengine_uk_data/utils/imputations/capital_gains.py +++ b/policyengine_uk_data/utils/imputations/capital_gains.py @@ -127,6 +127,11 @@ def loss(blend_factor): upper = row.maximum_total_income ti_in_range = (ti >= lower) * (ti < upper) in_target_range = has_cg * ti_in_range + if not in_target_range.any(): + print( + f"Skipping capital gains for income range {lower} to {upper}" + ) + continue quantiles = np.random.random(int(in_target_range.sum())) pred_capital_gains = spline(quantiles) new_cg[in_target_range] = pred_capital_gains From c36cff37af9963b2f9252d25696b4d509686043a Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 10 Jul 2025 11:53:25 +0100 Subject: [PATCH 4/7] Print --- policyengine_uk_data/utils/imputations/capital_gains.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/policyengine_uk_data/utils/imputations/capital_gains.py b/policyengine_uk_data/utils/imputations/capital_gains.py index 51d15b417..673981477 100644 --- a/policyengine_uk_data/utils/imputations/capital_gains.py +++ b/policyengine_uk_data/utils/imputations/capital_gains.py @@ -127,12 +127,13 @@ def loss(blend_factor): upper = row.maximum_total_income ti_in_range = (ti >= lower) * (ti < upper) in_target_range = has_cg * ti_in_range + print(in_target_range.any(), in_target_range.sum()) if not in_target_range.any(): print( f"Skipping capital gains for income range {lower} to {upper}" ) continue - quantiles = np.random.random(int(in_target_range.sum())) + quantiles = np.random.random(int(in_target_range.values.sum())) pred_capital_gains = spline(quantiles) new_cg[in_target_range] = pred_capital_gains From 08cdfaf02c6decc457e1451d5852614e6829fc5d Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 10 Jul 2025 12:03:20 +0100 Subject: [PATCH 5/7] Try this?? --- policyengine_uk_data/utils/imputations/capital_gains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/policyengine_uk_data/utils/imputations/capital_gains.py b/policyengine_uk_data/utils/imputations/capital_gains.py index 673981477..9abcafc00 100644 --- a/policyengine_uk_data/utils/imputations/capital_gains.py +++ b/policyengine_uk_data/utils/imputations/capital_gains.py @@ -126,7 +126,7 @@ def loss(blend_factor): lower = row.minimum_total_income upper = row.maximum_total_income ti_in_range = (ti >= lower) * (ti < upper) - in_target_range = has_cg * ti_in_range + in_target_range = has_cg * ti_in_range > 0 print(in_target_range.any(), in_target_range.sum()) if not in_target_range.any(): print( From 43c9c0ad0508e82df314139656360a3d7986c2ad Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 10 Jul 2025 12:58:27 +0100 Subject: [PATCH 6/7] Correctly cast data type --- policyengine_uk_data/utils/imputations/capital_gains.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/policyengine_uk_data/utils/imputations/capital_gains.py b/policyengine_uk_data/utils/imputations/capital_gains.py index 9abcafc00..970b75586 100644 --- a/policyengine_uk_data/utils/imputations/capital_gains.py +++ b/policyengine_uk_data/utils/imputations/capital_gains.py @@ -127,12 +127,6 @@ def loss(blend_factor): upper = row.maximum_total_income ti_in_range = (ti >= lower) * (ti < upper) in_target_range = has_cg * ti_in_range > 0 - print(in_target_range.any(), in_target_range.sum()) - if not in_target_range.any(): - print( - f"Skipping capital gains for income range {lower} to {upper}" - ) - continue quantiles = np.random.random(int(in_target_range.values.sum())) pred_capital_gains = spline(quantiles) new_cg[in_target_range] = pred_capital_gains From 523e48d0fb9df8510617ce45e1cac38d08b0c10a Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 10 Jul 2025 13:08:23 +0100 Subject: [PATCH 7/7] Add student loan repayments --- policyengine_uk_data/datasets/frs/frs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/policyengine_uk_data/datasets/frs/frs.py b/policyengine_uk_data/datasets/frs/frs.py index ea092d7e0..bf11e46cf 100644 --- a/policyengine_uk_data/datasets/frs/frs.py +++ b/policyengine_uk_data/datasets/frs/frs.py @@ -573,6 +573,8 @@ def add_market_income( frs["lump_sum_income"] = person.REDAMT + frs["student_loan_repayments"] = person.SLREPAMT * 52 + def sum_from_positive_fields( table: pd.DataFrame, fields: List[str]