diff --git a/changelog.d/codex-frs-own-right-benefits.added.md b/changelog.d/codex-frs-own-right-benefits.added.md new file mode 100644 index 000000000..e8104cd47 --- /dev/null +++ b/changelog.d/codex-frs-own-right-benefits.added.md @@ -0,0 +1 @@ +Added FRS person-level child and qualifying young person inputs for reported adult benefit receipt, current non-advanced education, approved training, education/training entry age, and the Universal Credit terminal-date proxy. diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py index 170f11b51..f20dd08e8 100644 --- a/policyengine_uk_data/datasets/frs.py +++ b/policyengine_uk_data/datasets/frs.py @@ -56,6 +56,23 @@ "disabled_students_allowance_course_eligible", "disabled_students_allowance_has_qualifying_condition", ) +BENEFITS_IN_OWN_RIGHT_REPORTED_COLUMNS = ( + "universal_credit_reported", + "jsa_contrib_reported", + "jsa_income_reported", + "esa_contrib_reported", + "esa_income_reported", +) +NON_ADVANCED_EDUCATION_LEVELS = ( + "PRE_PRIMARY", + "PRIMARY", + "LOWER_SECONDARY", + "UPPER_SECONDARY", + "POST_SECONDARY", +) +# FRS government-training question variants use 10 or 13 for "None of these". +FRS_APPROVED_TRAINING_CODES = tuple(range(1, 10)) +UNKNOWN_QUALIFYING_EDUCATION_OR_TRAINING_ENTRY_AGE = 1000 @lru_cache(maxsize=None) @@ -166,6 +183,88 @@ def derive_esa_support_group_proxy( ) +def derive_receives_benefits_in_own_right(pe_person: pd.DataFrame) -> pd.Series: + """Identify people reporting adult benefits that end QYP status.""" + + return ( + pe_person[list(BENEFITS_IN_OWN_RIGHT_REPORTED_COLUMNS)].fillna(0).sum(axis=1) + > 0 + ) + + +def derive_is_in_non_advanced_education( + current_education, + is_apprentice=None, +) -> np.ndarray: + """Identify current non-advanced education from PolicyEngine education states.""" + + current_education = np.asarray(current_education) + if is_apprentice is None: + is_apprentice = np.zeros(len(current_education), dtype=bool) + else: + is_apprentice = np.asarray(is_apprentice) + + return np.isin(current_education, NON_ADVANCED_EDUCATION_LEVELS) & ~is_apprentice + + +def derive_is_in_approved_training_from_frs_person( + person: pd.DataFrame, +) -> pd.Series: + """Identify reported government training scheme participation in FRS.""" + + if "train" not in person.columns: + return pd.Series(False, index=person.index) + + train = pd.to_numeric(person.train, errors="coerce").fillna(0) + return train.isin(FRS_APPROVED_TRAINING_CODES) + + +def derive_age_started_or_accepted_current_education_or_training( + age, + is_in_non_advanced_education, + is_in_approved_training, +) -> np.ndarray: + """Approximate the entry age for current QYP education or training. + + FRS observes current education/training status but not the age at which the + current course or programme was started, enrolled on, or accepted. For + people currently in qualifying education/training, cap the imputed entry + age at 18 so observed 19-year-olds remain eligible for rules requiring + entry before age 19. + """ + + age = np.asarray(age) + in_qualifying_education_or_training = np.asarray( + is_in_non_advanced_education + ) | np.asarray(is_in_approved_training) + + return np.where( + in_qualifying_education_or_training, + np.minimum(age, 18), + UNKNOWN_QUALIFYING_EDUCATION_OR_TRAINING_ENTRY_AGE, + ) + + +def derive_is_before_universal_credit_qualifying_young_person_terminal_date( + age, + is_in_non_advanced_education, + is_in_approved_training, +) -> np.ndarray: + """Approximate the UC terminal-date condition for observed 19-year-olds. + + FRS does not expose the date-of-birth and assessment-period detail needed + to identify the exact 1 September terminal date. Use current qualifying + education/training status as the microdata proxy for age-19 records. + """ + + age = np.asarray(age) + in_qualifying_education_or_training = np.asarray( + is_in_non_advanced_education + ) | np.asarray(is_in_approved_training) + + return (age == 19) & in_qualifying_education_or_training + + def add_legacy_benefit_proxies( pe_person: pd.DataFrame, employment_status_reported, @@ -558,6 +657,26 @@ def determine_education_level(fted_val, typeed2_val, age_val): [determine_education_level(f, t, a) for f, t, a in zip(fted, typeed2, age)], index=pe_person.index, ) + pe_person["is_in_non_advanced_education"] = derive_is_in_non_advanced_education( + pe_person.current_education + ) + pe_person["is_in_approved_training"] = ( + derive_is_in_approved_training_from_frs_person(person) + ) + pe_person["age_started_or_accepted_current_education_or_training"] = ( + derive_age_started_or_accepted_current_education_or_training( + age, + pe_person.is_in_non_advanced_education, + pe_person.is_in_approved_training, + ) + ) + pe_person["is_before_universal_credit_qualifying_young_person_terminal_date"] = ( + derive_is_before_universal_credit_qualifying_young_person_terminal_date( + age, + pe_person.is_in_non_advanced_education, + pe_person.is_in_approved_training, + ) + ) # Add highest education from EDUCQUAL (highest qualification achieved) # Codes from FRS ADT_324X classification; unmapped codes default to UPPER_SECONDARY @@ -923,6 +1042,9 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) * WEEKS_IN_YEAR ) + pe_person["receives_benefits_in_own_right"] = derive_receives_benefits_in_own_right( + pe_person + ) pe_person["bsp_reported"] = ( sum_to_entity( diff --git a/policyengine_uk_data/tests/test_legacy_benefit_proxies.py b/policyengine_uk_data/tests/test_legacy_benefit_proxies.py index d09e57ee1..4432ee550 100644 --- a/policyengine_uk_data/tests/test_legacy_benefit_proxies.py +++ b/policyengine_uk_data/tests/test_legacy_benefit_proxies.py @@ -8,9 +8,14 @@ attach_legacy_benefit_proxies_from_frs_person, apply_legacy_benefit_proxies, create_frs, + derive_age_started_or_accepted_current_education_or_training, derive_esa_health_condition_proxy, derive_esa_support_group_proxy, + derive_is_before_universal_credit_qualifying_young_person_terminal_date, + derive_is_in_approved_training_from_frs_person, + derive_is_in_non_advanced_education, derive_legacy_jobseeker_proxy, + derive_receives_benefits_in_own_right, load_legacy_jobseeker_max_annual_hours, ) @@ -119,6 +124,76 @@ def test_esa_support_group_proxy_is_stricter_subset_of_health_proxy(): assert result.tolist() == [True, False, False, False, False] +def test_receives_benefits_in_own_right_uses_reported_adult_benefits(): + pe_person = pd.DataFrame( + { + "universal_credit_reported": [0, 1, 0, 0, 0, 0], + "jsa_contrib_reported": [0, 0, 1, 0, 0, 0], + "jsa_income_reported": [0, 0, 0, 1, 0, 0], + "esa_contrib_reported": [0, 0, 0, 0, 1, 0], + "esa_income_reported": [0, 0, 0, 0, 0, 1], + } + ) + + result = derive_receives_benefits_in_own_right(pe_person) + + assert result.tolist() == [False, True, True, True, True, True] + + +def test_qualifying_young_person_education_inputs_use_current_education(): + result = derive_is_in_non_advanced_education( + np.array( + [ + "NOT_IN_EDUCATION", + "LOWER_SECONDARY", + "UPPER_SECONDARY", + "POST_SECONDARY", + "TERTIARY", + "UPPER_SECONDARY", + ] + ), + is_apprentice=np.array([False, False, False, False, False, True]), + ) + + assert result.tolist() == [False, True, True, True, False, False] + + +def test_approved_training_uses_frs_government_training_codes(): + person = pd.DataFrame({"train": [-1, 0, 1, 2, 9, 10, 13, np.nan]}) + + result = derive_is_in_approved_training_from_frs_person(person) + + assert result.tolist() == [False, False, True, True, True, False, False, False] + + +def test_approved_training_defaults_false_when_frs_field_missing(): + person = pd.DataFrame({"age": [16, 19]}) + + result = derive_is_in_approved_training_from_frs_person(person) + + assert result.tolist() == [False, False] + + +def test_qyp_entry_age_proxy_caps_current_education_or_training_at_18(): + result = derive_age_started_or_accepted_current_education_or_training( + age=np.array([16, 18, 19, 20, 19]), + is_in_non_advanced_education=np.array([True, True, True, True, False]), + is_in_approved_training=np.array([False, False, False, False, False]), + ) + + assert result.tolist() == [16, 18, 18, 18, 1000] + + +def test_uc_terminal_date_proxy_applies_to_19yo_current_qyp_activity_only(): + result = derive_is_before_universal_credit_qualifying_young_person_terminal_date( + age=np.array([18, 19, 19, 19, 20]), + is_in_non_advanced_education=np.array([True, True, False, False, True]), + is_in_approved_training=np.array([False, False, True, False, False]), + ) + + assert result.tolist() == [False, True, True, False, False] + + def test_add_legacy_benefit_proxies_wires_all_three_columns(): pe_person = pd.DataFrame( { @@ -465,8 +540,22 @@ def fake_read_csv(path, *args, **kwargs): "legacy_jobseeker_proxy", "esa_health_condition_proxy", "esa_support_group_proxy", + "receives_benefits_in_own_right", + "is_in_non_advanced_education", + "is_in_approved_training", + "age_started_or_accepted_current_education_or_training", + "is_before_universal_credit_qualifying_young_person_terminal_date", "is_parent", }.issubset(dataset.person.columns) assert not dataset.person["is_parent"].iloc[0] + assert not dataset.person["is_in_non_advanced_education"].iloc[0] + assert not dataset.person["is_in_approved_training"].iloc[0] + assert ( + dataset.person["age_started_or_accepted_current_education_or_training"].iloc[0] + == 1000 + ) + assert not dataset.person[ + "is_before_universal_credit_qualifying_young_person_terminal_date" + ].iloc[0] assert dataset.person["education_grants"].iloc[0] == 100 assert dataset.person["disabled_students_allowance_eligible_expenses"].iloc[0] == 0