From f502fc1d5323b7d47a4f2703c60d60b640a5c8e0 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Tue, 28 Apr 2026 17:52:06 -0400 Subject: [PATCH 1/2] Derive own-right benefit receipt in FRS --- .../codex-frs-own-right-benefits.added.md | 1 + policyengine_uk_data/datasets/frs.py | 19 +++++++++++++++++++ .../tests/test_legacy_benefit_proxies.py | 18 ++++++++++++++++++ 3 files changed, 38 insertions(+) create mode 100644 changelog.d/codex-frs-own-right-benefits.added.md diff --git a/changelog.d/codex-frs-own-right-benefits.added.md b/changelog.d/codex-frs-own-right-benefits.added.md new file mode 100644 index 000000000..f88aa63ea --- /dev/null +++ b/changelog.d/codex-frs-own-right-benefits.added.md @@ -0,0 +1 @@ +Added an FRS person-level `receives_benefits_in_own_right` input derived from reported UC, JSA, and ESA receipt. diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py index 170f11b51..516bf0314 100644 --- a/policyengine_uk_data/datasets/frs.py +++ b/policyengine_uk_data/datasets/frs.py @@ -56,6 +56,13 @@ "disabled_students_allowance_course_eligible", "disabled_students_allowance_has_qualifying_condition", ) +BENEFITS_IN_OWN_RIGHT_REPORTED_COLUMNS = ( + "universal_credit_reported", + "jsa_contrib_reported", + "jsa_income_reported", + "esa_contrib_reported", + "esa_income_reported", +) @lru_cache(maxsize=None) @@ -166,6 +173,15 @@ def derive_esa_support_group_proxy( ) +def derive_receives_benefits_in_own_right(pe_person: pd.DataFrame) -> pd.Series: + """Identify people reporting adult benefits that end QYP status.""" + + return ( + pe_person[list(BENEFITS_IN_OWN_RIGHT_REPORTED_COLUMNS)].fillna(0).sum(axis=1) + > 0 + ) + + def add_legacy_benefit_proxies( pe_person: pd.DataFrame, employment_status_reported, @@ -923,6 +939,9 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) * WEEKS_IN_YEAR ) + pe_person["receives_benefits_in_own_right"] = derive_receives_benefits_in_own_right( + pe_person + ) pe_person["bsp_reported"] = ( sum_to_entity( diff --git a/policyengine_uk_data/tests/test_legacy_benefit_proxies.py b/policyengine_uk_data/tests/test_legacy_benefit_proxies.py index d09e57ee1..c4b9cf0d7 100644 --- a/policyengine_uk_data/tests/test_legacy_benefit_proxies.py +++ b/policyengine_uk_data/tests/test_legacy_benefit_proxies.py @@ -11,6 +11,7 @@ derive_esa_health_condition_proxy, derive_esa_support_group_proxy, derive_legacy_jobseeker_proxy, + derive_receives_benefits_in_own_right, load_legacy_jobseeker_max_annual_hours, ) @@ -119,6 +120,22 @@ def test_esa_support_group_proxy_is_stricter_subset_of_health_proxy(): assert result.tolist() == [True, False, False, False, False] +def test_receives_benefits_in_own_right_uses_reported_adult_benefits(): + pe_person = pd.DataFrame( + { + "universal_credit_reported": [0, 1, 0, 0, 0, 0], + "jsa_contrib_reported": [0, 0, 1, 0, 0, 0], + "jsa_income_reported": [0, 0, 0, 1, 0, 0], + "esa_contrib_reported": [0, 0, 0, 0, 1, 0], + "esa_income_reported": [0, 0, 0, 0, 0, 1], + } + ) + + result = derive_receives_benefits_in_own_right(pe_person) + + assert result.tolist() == [False, True, True, True, True, True] + + def test_add_legacy_benefit_proxies_wires_all_three_columns(): pe_person = pd.DataFrame( { @@ -465,6 +482,7 @@ def fake_read_csv(path, *args, **kwargs): "legacy_jobseeker_proxy", "esa_health_condition_proxy", "esa_support_group_proxy", + "receives_benefits_in_own_right", "is_parent", }.issubset(dataset.person.columns) assert not dataset.person["is_parent"].iloc[0] From c8d2a10aa2f066c369421938b754590d6a2259ea Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Tue, 28 Apr 2026 21:18:32 -0400 Subject: [PATCH 2/2] Add FRS child QYP inputs --- .../codex-frs-own-right-benefits.added.md | 2 +- policyengine_uk_data/datasets/frs.py | 103 ++++++++++++++++++ .../tests/test_legacy_benefit_proxies.py | 71 ++++++++++++ 3 files changed, 175 insertions(+), 1 deletion(-) diff --git a/changelog.d/codex-frs-own-right-benefits.added.md b/changelog.d/codex-frs-own-right-benefits.added.md index f88aa63ea..e8104cd47 100644 --- a/changelog.d/codex-frs-own-right-benefits.added.md +++ b/changelog.d/codex-frs-own-right-benefits.added.md @@ -1 +1 @@ -Added an FRS person-level `receives_benefits_in_own_right` input derived from reported UC, JSA, and ESA receipt. +Added FRS person-level child and qualifying young person inputs for reported adult benefit receipt, current non-advanced education, approved training, education/training entry age, and the Universal Credit terminal-date proxy. diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py index 516bf0314..f20dd08e8 100644 --- a/policyengine_uk_data/datasets/frs.py +++ b/policyengine_uk_data/datasets/frs.py @@ -63,6 +63,16 @@ "esa_contrib_reported", "esa_income_reported", ) +NON_ADVANCED_EDUCATION_LEVELS = ( + "PRE_PRIMARY", + "PRIMARY", + "LOWER_SECONDARY", + "UPPER_SECONDARY", + "POST_SECONDARY", +) +# FRS government-training question variants use 10 or 13 for "None of these". +FRS_APPROVED_TRAINING_CODES = tuple(range(1, 10)) +UNKNOWN_QUALIFYING_EDUCATION_OR_TRAINING_ENTRY_AGE = 1000 @lru_cache(maxsize=None) @@ -182,6 +192,79 @@ def derive_receives_benefits_in_own_right(pe_person: pd.DataFrame) -> pd.Series: ) +def derive_is_in_non_advanced_education( + current_education, + is_apprentice=None, +) -> np.ndarray: + """Identify current non-advanced education from PolicyEngine education states.""" + + current_education = np.asarray(current_education) + if is_apprentice is None: + is_apprentice = np.zeros(len(current_education), dtype=bool) + else: + is_apprentice = np.asarray(is_apprentice) + + return np.isin(current_education, NON_ADVANCED_EDUCATION_LEVELS) & ~is_apprentice + + +def derive_is_in_approved_training_from_frs_person( + person: pd.DataFrame, +) -> pd.Series: + """Identify reported government training scheme participation in FRS.""" + + if "train" not in person.columns: + return pd.Series(False, index=person.index) + + train = pd.to_numeric(person.train, errors="coerce").fillna(0) + return train.isin(FRS_APPROVED_TRAINING_CODES) + + +def derive_age_started_or_accepted_current_education_or_training( + age, + is_in_non_advanced_education, + is_in_approved_training, +) -> np.ndarray: + """Approximate the entry age for current QYP education or training. + + FRS observes current education/training status but not the age at which the + current course or programme was started, enrolled on, or accepted. For + people currently in qualifying education/training, cap the imputed entry + age at 18 so observed 19-year-olds remain eligible for rules requiring + entry before age 19. + """ + + age = np.asarray(age) + in_qualifying_education_or_training = np.asarray( + is_in_non_advanced_education + ) | np.asarray(is_in_approved_training) + + return np.where( + in_qualifying_education_or_training, + np.minimum(age, 18), + UNKNOWN_QUALIFYING_EDUCATION_OR_TRAINING_ENTRY_AGE, + ) + + +def derive_is_before_universal_credit_qualifying_young_person_terminal_date( + age, + is_in_non_advanced_education, + is_in_approved_training, +) -> np.ndarray: + """Approximate the UC terminal-date condition for observed 19-year-olds. + + FRS does not expose the date-of-birth and assessment-period detail needed + to identify the exact 1 September terminal date. Use current qualifying + education/training status as the microdata proxy for age-19 records. + """ + + age = np.asarray(age) + in_qualifying_education_or_training = np.asarray( + is_in_non_advanced_education + ) | np.asarray(is_in_approved_training) + + return (age == 19) & in_qualifying_education_or_training + + def add_legacy_benefit_proxies( pe_person: pd.DataFrame, employment_status_reported, @@ -574,6 +657,26 @@ def determine_education_level(fted_val, typeed2_val, age_val): [determine_education_level(f, t, a) for f, t, a in zip(fted, typeed2, age)], index=pe_person.index, ) + pe_person["is_in_non_advanced_education"] = derive_is_in_non_advanced_education( + pe_person.current_education + ) + pe_person["is_in_approved_training"] = ( + derive_is_in_approved_training_from_frs_person(person) + ) + pe_person["age_started_or_accepted_current_education_or_training"] = ( + derive_age_started_or_accepted_current_education_or_training( + age, + pe_person.is_in_non_advanced_education, + pe_person.is_in_approved_training, + ) + ) + pe_person["is_before_universal_credit_qualifying_young_person_terminal_date"] = ( + derive_is_before_universal_credit_qualifying_young_person_terminal_date( + age, + pe_person.is_in_non_advanced_education, + pe_person.is_in_approved_training, + ) + ) # Add highest education from EDUCQUAL (highest qualification achieved) # Codes from FRS ADT_324X classification; unmapped codes default to UPPER_SECONDARY diff --git a/policyengine_uk_data/tests/test_legacy_benefit_proxies.py b/policyengine_uk_data/tests/test_legacy_benefit_proxies.py index c4b9cf0d7..4432ee550 100644 --- a/policyengine_uk_data/tests/test_legacy_benefit_proxies.py +++ b/policyengine_uk_data/tests/test_legacy_benefit_proxies.py @@ -8,8 +8,12 @@ attach_legacy_benefit_proxies_from_frs_person, apply_legacy_benefit_proxies, create_frs, + derive_age_started_or_accepted_current_education_or_training, derive_esa_health_condition_proxy, derive_esa_support_group_proxy, + derive_is_before_universal_credit_qualifying_young_person_terminal_date, + derive_is_in_approved_training_from_frs_person, + derive_is_in_non_advanced_education, derive_legacy_jobseeker_proxy, derive_receives_benefits_in_own_right, load_legacy_jobseeker_max_annual_hours, @@ -136,6 +140,60 @@ def test_receives_benefits_in_own_right_uses_reported_adult_benefits(): assert result.tolist() == [False, True, True, True, True, True] +def test_qualifying_young_person_education_inputs_use_current_education(): + result = derive_is_in_non_advanced_education( + np.array( + [ + "NOT_IN_EDUCATION", + "LOWER_SECONDARY", + "UPPER_SECONDARY", + "POST_SECONDARY", + "TERTIARY", + "UPPER_SECONDARY", + ] + ), + is_apprentice=np.array([False, False, False, False, False, True]), + ) + + assert result.tolist() == [False, True, True, True, False, False] + + +def test_approved_training_uses_frs_government_training_codes(): + person = pd.DataFrame({"train": [-1, 0, 1, 2, 9, 10, 13, np.nan]}) + + result = derive_is_in_approved_training_from_frs_person(person) + + assert result.tolist() == [False, False, True, True, True, False, False, False] + + +def test_approved_training_defaults_false_when_frs_field_missing(): + person = pd.DataFrame({"age": [16, 19]}) + + result = derive_is_in_approved_training_from_frs_person(person) + + assert result.tolist() == [False, False] + + +def test_qyp_entry_age_proxy_caps_current_education_or_training_at_18(): + result = derive_age_started_or_accepted_current_education_or_training( + age=np.array([16, 18, 19, 20, 19]), + is_in_non_advanced_education=np.array([True, True, True, True, False]), + is_in_approved_training=np.array([False, False, False, False, False]), + ) + + assert result.tolist() == [16, 18, 18, 18, 1000] + + +def test_uc_terminal_date_proxy_applies_to_19yo_current_qyp_activity_only(): + result = derive_is_before_universal_credit_qualifying_young_person_terminal_date( + age=np.array([18, 19, 19, 19, 20]), + is_in_non_advanced_education=np.array([True, True, False, False, True]), + is_in_approved_training=np.array([False, False, True, False, False]), + ) + + assert result.tolist() == [False, True, True, False, False] + + def test_add_legacy_benefit_proxies_wires_all_three_columns(): pe_person = pd.DataFrame( { @@ -483,8 +541,21 @@ def fake_read_csv(path, *args, **kwargs): "esa_health_condition_proxy", "esa_support_group_proxy", "receives_benefits_in_own_right", + "is_in_non_advanced_education", + "is_in_approved_training", + "age_started_or_accepted_current_education_or_training", + "is_before_universal_credit_qualifying_young_person_terminal_date", "is_parent", }.issubset(dataset.person.columns) assert not dataset.person["is_parent"].iloc[0] + assert not dataset.person["is_in_non_advanced_education"].iloc[0] + assert not dataset.person["is_in_approved_training"].iloc[0] + assert ( + dataset.person["age_started_or_accepted_current_education_or_training"].iloc[0] + == 1000 + ) + assert not dataset.person[ + "is_before_universal_credit_qualifying_young_person_terminal_date" + ].iloc[0] assert dataset.person["education_grants"].iloc[0] == 100 assert dataset.person["disabled_students_allowance_eligible_expenses"].iloc[0] == 0