diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..d82546cdc 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + changed: + - Use all examples in the SPI to train income models, not just rich ones. diff --git a/policyengine_uk_data/datasets/frs/frs.py b/policyengine_uk_data/datasets/frs/frs.py index 096fdbe9d..4c0dfe7ea 100644 --- a/policyengine_uk_data/datasets/frs/frs.py +++ b/policyengine_uk_data/datasets/frs/frs.py @@ -446,7 +446,7 @@ def add_market_income( household (DataFrame) oddjob (DataFrame) """ - frs["employment_income"] = person.INEARNS * 52 + frs["employment_income"] = person.INEARNS * 0 pension_payment = sum_to_entity( pension.PENPAY * (pension.PENPAY > 0), pension.person_id, person.index diff --git a/policyengine_uk_data/utils/imputations/income.py b/policyengine_uk_data/utils/imputations/income.py index 9110a5b43..4a711b9e0 100644 --- a/policyengine_uk_data/utils/imputations/income.py +++ b/policyengine_uk_data/utils/imputations/income.py @@ -58,7 +58,7 @@ def generate_spi_table(spi: pd.DataFrame): spi["employment_income"] = spi[["PAY", "EPB", "TAXTERM"]].sum(axis=1) - spi = spi[spi.TI > 500_000] + spi = spi.sample(100_000) return spi