Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/codex-frs-own-right-benefits.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added FRS person-level child and qualifying young person inputs for reported adult benefit receipt, current non-advanced education, approved training, education/training entry age, and the Universal Credit terminal-date proxy.
122 changes: 122 additions & 0 deletions policyengine_uk_data/datasets/frs.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,23 @@
"disabled_students_allowance_course_eligible",
"disabled_students_allowance_has_qualifying_condition",
)
BENEFITS_IN_OWN_RIGHT_REPORTED_COLUMNS = (
"universal_credit_reported",
"jsa_contrib_reported",
"jsa_income_reported",
"esa_contrib_reported",
"esa_income_reported",
)
NON_ADVANCED_EDUCATION_LEVELS = (
"PRE_PRIMARY",
"PRIMARY",
"LOWER_SECONDARY",
"UPPER_SECONDARY",
"POST_SECONDARY",
)
# FRS government-training question variants use 10 or 13 for "None of these".
FRS_APPROVED_TRAINING_CODES = tuple(range(1, 10))
UNKNOWN_QUALIFYING_EDUCATION_OR_TRAINING_ENTRY_AGE = 1000


@lru_cache(maxsize=None)
Expand Down Expand Up @@ -166,6 +183,88 @@ def derive_esa_support_group_proxy(
)


def derive_receives_benefits_in_own_right(pe_person: pd.DataFrame) -> pd.Series:
"""Identify people reporting adult benefits that end QYP status."""

return (
pe_person[list(BENEFITS_IN_OWN_RIGHT_REPORTED_COLUMNS)].fillna(0).sum(axis=1)
> 0
)


def derive_is_in_non_advanced_education(
current_education,
is_apprentice=None,
) -> np.ndarray:
"""Identify current non-advanced education from PolicyEngine education states."""

current_education = np.asarray(current_education)
if is_apprentice is None:
is_apprentice = np.zeros(len(current_education), dtype=bool)
else:
is_apprentice = np.asarray(is_apprentice)

return np.isin(current_education, NON_ADVANCED_EDUCATION_LEVELS) & ~is_apprentice


def derive_is_in_approved_training_from_frs_person(
person: pd.DataFrame,
) -> pd.Series:
"""Identify reported government training scheme participation in FRS."""

if "train" not in person.columns:
return pd.Series(False, index=person.index)

train = pd.to_numeric(person.train, errors="coerce").fillna(0)
return train.isin(FRS_APPROVED_TRAINING_CODES)


def derive_age_started_or_accepted_current_education_or_training(
age,
is_in_non_advanced_education,
is_in_approved_training,
) -> np.ndarray:
"""Approximate the entry age for current QYP education or training.

FRS observes current education/training status but not the age at which the
current course or programme was started, enrolled on, or accepted. For
people currently in qualifying education/training, cap the imputed entry
age at 18 so observed 19-year-olds remain eligible for rules requiring
entry before age 19.
"""

age = np.asarray(age)
in_qualifying_education_or_training = np.asarray(
is_in_non_advanced_education
) | np.asarray(is_in_approved_training)

return np.where(
in_qualifying_education_or_training,
np.minimum(age, 18),
UNKNOWN_QUALIFYING_EDUCATION_OR_TRAINING_ENTRY_AGE,
)


def derive_is_before_universal_credit_qualifying_young_person_terminal_date(
age,
is_in_non_advanced_education,
is_in_approved_training,
) -> np.ndarray:
"""Approximate the UC terminal-date condition for observed 19-year-olds.

FRS does not expose the date-of-birth and assessment-period detail needed
to identify the exact 1 September terminal date. Use current qualifying
education/training status as the microdata proxy for age-19 records.
"""

age = np.asarray(age)
in_qualifying_education_or_training = np.asarray(
is_in_non_advanced_education
) | np.asarray(is_in_approved_training)

return (age == 19) & in_qualifying_education_or_training


def add_legacy_benefit_proxies(
pe_person: pd.DataFrame,
employment_status_reported,
Expand Down Expand Up @@ -558,6 +657,26 @@ def determine_education_level(fted_val, typeed2_val, age_val):
[determine_education_level(f, t, a) for f, t, a in zip(fted, typeed2, age)],
index=pe_person.index,
)
pe_person["is_in_non_advanced_education"] = derive_is_in_non_advanced_education(
pe_person.current_education
)
pe_person["is_in_approved_training"] = (
derive_is_in_approved_training_from_frs_person(person)
)
pe_person["age_started_or_accepted_current_education_or_training"] = (
derive_age_started_or_accepted_current_education_or_training(
age,
pe_person.is_in_non_advanced_education,
pe_person.is_in_approved_training,
)
)
pe_person["is_before_universal_credit_qualifying_young_person_terminal_date"] = (
derive_is_before_universal_credit_qualifying_young_person_terminal_date(
age,
pe_person.is_in_non_advanced_education,
pe_person.is_in_approved_training,
)
)

# Add highest education from EDUCQUAL (highest qualification achieved)
# Codes from FRS ADT_324X classification; unmapped codes default to UPPER_SECONDARY
Expand Down Expand Up @@ -923,6 +1042,9 @@ def determine_education_level(fted_val, typeed2_val, age_val):
)
* WEEKS_IN_YEAR
)
pe_person["receives_benefits_in_own_right"] = derive_receives_benefits_in_own_right(
pe_person
)

pe_person["bsp_reported"] = (
sum_to_entity(
Expand Down
89 changes: 89 additions & 0 deletions policyengine_uk_data/tests/test_legacy_benefit_proxies.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,14 @@
attach_legacy_benefit_proxies_from_frs_person,
apply_legacy_benefit_proxies,
create_frs,
derive_age_started_or_accepted_current_education_or_training,
derive_esa_health_condition_proxy,
derive_esa_support_group_proxy,
derive_is_before_universal_credit_qualifying_young_person_terminal_date,
derive_is_in_approved_training_from_frs_person,
derive_is_in_non_advanced_education,
derive_legacy_jobseeker_proxy,
derive_receives_benefits_in_own_right,
load_legacy_jobseeker_max_annual_hours,
)

Expand Down Expand Up @@ -119,6 +124,76 @@ def test_esa_support_group_proxy_is_stricter_subset_of_health_proxy():
assert result.tolist() == [True, False, False, False, False]


def test_receives_benefits_in_own_right_uses_reported_adult_benefits():
pe_person = pd.DataFrame(
{
"universal_credit_reported": [0, 1, 0, 0, 0, 0],
"jsa_contrib_reported": [0, 0, 1, 0, 0, 0],
"jsa_income_reported": [0, 0, 0, 1, 0, 0],
"esa_contrib_reported": [0, 0, 0, 0, 1, 0],
"esa_income_reported": [0, 0, 0, 0, 0, 1],
}
)

result = derive_receives_benefits_in_own_right(pe_person)

assert result.tolist() == [False, True, True, True, True, True]


def test_qualifying_young_person_education_inputs_use_current_education():
result = derive_is_in_non_advanced_education(
np.array(
[
"NOT_IN_EDUCATION",
"LOWER_SECONDARY",
"UPPER_SECONDARY",
"POST_SECONDARY",
"TERTIARY",
"UPPER_SECONDARY",
]
),
is_apprentice=np.array([False, False, False, False, False, True]),
)

assert result.tolist() == [False, True, True, True, False, False]


def test_approved_training_uses_frs_government_training_codes():
person = pd.DataFrame({"train": [-1, 0, 1, 2, 9, 10, 13, np.nan]})

result = derive_is_in_approved_training_from_frs_person(person)

assert result.tolist() == [False, False, True, True, True, False, False, False]


def test_approved_training_defaults_false_when_frs_field_missing():
person = pd.DataFrame({"age": [16, 19]})

result = derive_is_in_approved_training_from_frs_person(person)

assert result.tolist() == [False, False]


def test_qyp_entry_age_proxy_caps_current_education_or_training_at_18():
result = derive_age_started_or_accepted_current_education_or_training(
age=np.array([16, 18, 19, 20, 19]),
is_in_non_advanced_education=np.array([True, True, True, True, False]),
is_in_approved_training=np.array([False, False, False, False, False]),
)

assert result.tolist() == [16, 18, 18, 18, 1000]


def test_uc_terminal_date_proxy_applies_to_19yo_current_qyp_activity_only():
result = derive_is_before_universal_credit_qualifying_young_person_terminal_date(
age=np.array([18, 19, 19, 19, 20]),
is_in_non_advanced_education=np.array([True, True, False, False, True]),
is_in_approved_training=np.array([False, False, True, False, False]),
)

assert result.tolist() == [False, True, True, False, False]


def test_add_legacy_benefit_proxies_wires_all_three_columns():
pe_person = pd.DataFrame(
{
Expand Down Expand Up @@ -465,8 +540,22 @@ def fake_read_csv(path, *args, **kwargs):
"legacy_jobseeker_proxy",
"esa_health_condition_proxy",
"esa_support_group_proxy",
"receives_benefits_in_own_right",
"is_in_non_advanced_education",
"is_in_approved_training",
"age_started_or_accepted_current_education_or_training",
"is_before_universal_credit_qualifying_young_person_terminal_date",
"is_parent",
}.issubset(dataset.person.columns)
assert not dataset.person["is_parent"].iloc[0]
assert not dataset.person["is_in_non_advanced_education"].iloc[0]
assert not dataset.person["is_in_approved_training"].iloc[0]
assert (
dataset.person["age_started_or_accepted_current_education_or_training"].iloc[0]
== 1000
)
assert not dataset.person[
"is_before_universal_credit_qualifying_young_person_terminal_date"
].iloc[0]
assert dataset.person["education_grants"].iloc[0] == 100
assert dataset.person["disabled_students_allowance_eligible_expenses"].iloc[0] == 0