Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/73.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Assign `is_parent` from FRS adult-table membership and benefit-unit dependent-child counts (#73).
47 changes: 47 additions & 0 deletions policyengine_uk_data/datasets/frs.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,36 @@ def attach_legacy_benefit_proxies_from_frs_person(
)


def derive_is_parent_from_frs_microdata(
person_ids,
person_benunit_ids,
adult_person_ids,
benunit_ids,
dependent_children,
) -> np.ndarray:
"""Identify FRS adults in benefit units with dependent children.

FRS benefit units contain either one adult or a couple plus any dependent
children. Using the raw adult table and benefit-unit dependent-child count
avoids ranking adults across the whole household when multiple benefit
units share a household.
"""

dependent_children_by_benunit = pd.Series(
np.asarray(dependent_children, dtype=float),
index=np.asarray(benunit_ids),
)
has_dependent_children = (
pd.Series(np.asarray(person_benunit_ids))
.map(dependent_children_by_benunit)
.fillna(0)
.to_numpy()
> 0
)
is_adult_record = np.isin(np.asarray(person_ids), np.asarray(adult_person_ids))
return is_adult_record & has_dependent_children


def _as_non_negative_array(values) -> np.ndarray:
values = np.asarray(values, dtype=float)
return np.maximum(np.nan_to_num(values, nan=0.0), 0.0)
Expand Down Expand Up @@ -443,6 +473,23 @@ def create_frs(
pe_person["hours_worked"] = np.maximum(person.tothours, 0) * 52
pe_person["is_household_head"] = person.hrpid == 1
pe_person["is_benunit_head"] = person.uperson == 1
dependent_children = (
benunit.depchldb
if "depchldb" in benunit
else frs["child"]
.groupby("benunit_id")
.size()
.reindex(benunit.benunit_id)
.fillna(0)
.to_numpy()
)
pe_person["is_parent"] = derive_is_parent_from_frs_microdata(
person_ids=pe_person.person_id,
person_benunit_ids=pe_person.person_benunit_id,
adult_person_ids=frs["adult"].person_id,
benunit_ids=pe_benunit.benunit_id,
dependent_children=dependent_children,
)
MARITAL = [
"MARRIED",
"SINGLE",
Expand Down
27 changes: 27 additions & 0 deletions policyengine_uk_data/tests/test_is_parent_from_frs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import numpy as np

from policyengine_uk_data.datasets.frs import derive_is_parent_from_frs_microdata


def test_is_parent_uses_benefit_unit_not_household_rank():
result = derive_is_parent_from_frs_microdata(
person_ids=np.array([1_001, 1_002, 1_003, 1_004]),
person_benunit_ids=np.array([101, 101, 102, 102]),
adult_person_ids=np.array([1_001, 1_002, 1_003]),
benunit_ids=np.array([101, 102]),
dependent_children=np.array([0, 1]),
)

assert result.tolist() == [False, False, True, False]


def test_is_parent_marks_both_adults_in_couple_with_children():
result = derive_is_parent_from_frs_microdata(
person_ids=np.array([2_001, 2_002, 2_003]),
person_benunit_ids=np.array([201, 201, 201]),
adult_person_ids=np.array([2_001, 2_002]),
benunit_ids=np.array([201]),
dependent_children=np.array([1]),
)

assert result.tolist() == [True, True, False]
2 changes: 2 additions & 0 deletions policyengine_uk_data/tests/test_legacy_benefit_proxies.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,8 @@ def fake_read_csv(path, *args, **kwargs):
"legacy_jobseeker_proxy",
"esa_health_condition_proxy",
"esa_support_group_proxy",
"is_parent",
}.issubset(dataset.person.columns)
assert not dataset.person["is_parent"].iloc[0]
assert dataset.person["education_grants"].iloc[0] == 100
assert dataset.person["disabled_students_allowance_eligible_expenses"].iloc[0] == 0
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading