Skip to content

Commit 5dc1909

Browse files
committed
Derive microsimulation weights from household weights
1 parent d906744 commit 5dc1909

3 files changed

Lines changed: 234 additions & 12 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Derive microsimulation weights from household weights instead of entity-specific weight variables.

policyengine_core/simulations/microsimulation.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,52 @@ class Microsimulation(Simulation):
1616
def get_weights(
1717
self, variable_name: str, period: Period, map_to: str = None
1818
) -> ArrayLike:
19+
"""Return weights for a variable's entity from ``household_weight``."""
1920
time_period = get_period(period)
20-
variable = self.tax_benefit_system.get_variable(variable_name)
21+
variable = self.tax_benefit_system.get_variable(
22+
variable_name, check_existence=True
23+
)
2124
entity_key = map_to or variable.entity.key
22-
weight_variable_name = f"{entity_key}_weight"
23-
weight_variable = self.tax_benefit_system.get_variable(weight_variable_name)
24-
weights = None
25+
weight_variable = self.tax_benefit_system.get_variable(
26+
"household_weight", check_existence=True
27+
)
28+
weight_period = None
2529

2630
if time_period.unit == weight_variable.definition_period:
27-
weights = self.calculate(
28-
weight_variable_name, time_period, use_weights=False
29-
)
31+
weight_period = time_period
3032
elif (time_period.unit == MONTH) and (
3133
weight_variable.definition_period == YEAR
3234
):
3335
# Common use-case. To-do: implement others if needed.
34-
weights = self.calculate(
35-
weight_variable_name, time_period.this_year, use_weights=False
36-
)
36+
weight_period = time_period.this_year
3737

38-
return weights
38+
if weight_period is None:
39+
return None
40+
41+
household_weights = self.calculate(
42+
"household_weight",
43+
weight_period,
44+
use_weights=False,
45+
)
46+
return self._project_household_weights(household_weights, entity_key)
47+
48+
def _project_household_weights(
49+
self,
50+
household_weights: ArrayLike,
51+
entity_key: str,
52+
) -> ArrayLike:
53+
household_entity = self.tax_benefit_system.get_variable(
54+
"household_weight", check_existence=True
55+
).entity.key
56+
if entity_key == household_entity:
57+
return household_weights
58+
59+
household_population = self.populations[household_entity]
60+
person_weights = household_population.project(household_weights)
61+
if entity_key == self.tax_benefit_system.person_entity.key:
62+
return person_weights
63+
64+
return self.populations[entity_key].value_from_first_person(person_weights)
3965

4066
def calculate(
4167
self,
@@ -95,5 +121,5 @@ def calculate_dataframe(
95121
values = super().calculate_dataframe(variable_names, period, map_to)
96122
if not use_weights:
97123
return values
98-
weights = self.get_weights(variable_names[0], period)
124+
weights = self.get_weights(variable_names[0], period, map_to)
99125
return MicroDataFrame(values, weights=weights)
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
from policyengine_core.country_template import Microsimulation
5+
from policyengine_core.data import Dataset
6+
from policyengine_core.entities import build_entity
7+
from policyengine_core.model_api import ETERNITY, MONTH, YEAR, Variable
8+
from policyengine_core.simulations import Microsimulation as CoreMicrosimulation
9+
from policyengine_core.taxbenefitsystems import TaxBenefitSystem
10+
11+
12+
def _weighted_dataset(include_person_weight: bool = True) -> Dataset:
13+
data = {
14+
"person_id__2022": [0, 1, 2],
15+
"person_household_id__2022": [0, 0, 1],
16+
"person_household_role__2022": ["parent", "child", "parent"],
17+
"household_weight__2022": [10.0, 10.0, 20.0],
18+
"salary__2022-01": [100.0, 200.0, 300.0],
19+
}
20+
if include_person_weight:
21+
data["person_weight__2022"] = [1.0, 2.0, 3.0]
22+
return Dataset.from_dataframe(pd.DataFrame(data), "2022")
23+
24+
25+
def test__given_stale_person_weight__then_get_weights_uses_household_weight():
26+
# Given
27+
simulation = Microsimulation(dataset=_weighted_dataset())
28+
29+
# When
30+
weights = simulation.get_weights("salary", "2022-01")
31+
32+
# Then
33+
np.testing.assert_array_equal(weights, np.array([10.0, 10.0, 20.0]))
34+
35+
36+
def test__given_no_person_weight_variable__then_get_weights_uses_household_weight(
37+
isolated_tax_benefit_system,
38+
):
39+
# Given
40+
del isolated_tax_benefit_system.variables["person_weight"]
41+
simulation = Microsimulation(
42+
tax_benefit_system=isolated_tax_benefit_system,
43+
dataset=_weighted_dataset(include_person_weight=False),
44+
)
45+
46+
# When
47+
weights = simulation.get_weights("salary", "2022-01")
48+
49+
# Then
50+
np.testing.assert_array_equal(weights, np.array([10.0, 10.0, 20.0]))
51+
52+
53+
def test__given_dataframe_mapped_to_household__then_weights_are_household_weights():
54+
# Given
55+
simulation = Microsimulation(dataset=_weighted_dataset())
56+
57+
# When
58+
dataframe = simulation.calculate_dataframe(
59+
["salary"],
60+
"2022-01",
61+
map_to="household",
62+
)
63+
64+
# Then
65+
np.testing.assert_array_equal(dataframe.weights, np.array([10.0, 20.0]))
66+
67+
68+
Family = build_entity(
69+
key="family",
70+
plural="families",
71+
label="Family",
72+
roles=[{"key": "member", "plural": "members", "label": "Members"}],
73+
)
74+
75+
Household = build_entity(
76+
key="household",
77+
plural="households",
78+
label="Household",
79+
roles=[{"key": "member", "plural": "members", "label": "Members"}],
80+
)
81+
82+
Person = build_entity(
83+
key="person",
84+
plural="persons",
85+
label="Person",
86+
is_person=True,
87+
)
88+
89+
90+
class FamilyWeightTaxBenefitSystem(TaxBenefitSystem):
91+
entities = [Family, Household, Person]
92+
93+
94+
class person_id(Variable):
95+
value_type = int
96+
entity = Person
97+
definition_period = ETERNITY
98+
label = "Person ID"
99+
100+
101+
class person_family_id(Variable):
102+
value_type = int
103+
entity = Person
104+
definition_period = ETERNITY
105+
label = "Person family ID"
106+
107+
108+
class person_family_role(Variable):
109+
value_type = str
110+
entity = Person
111+
definition_period = ETERNITY
112+
label = "Person family role"
113+
114+
115+
class person_household_id(Variable):
116+
value_type = int
117+
entity = Person
118+
definition_period = ETERNITY
119+
label = "Person household ID"
120+
121+
122+
class person_household_role(Variable):
123+
value_type = str
124+
entity = Person
125+
definition_period = ETERNITY
126+
label = "Person household role"
127+
128+
129+
class household_weight(Variable):
130+
value_type = float
131+
entity = Household
132+
definition_period = YEAR
133+
label = "Household weight"
134+
135+
136+
class family_income_for_weight_test(Variable):
137+
value_type = float
138+
entity = Family
139+
definition_period = MONTH
140+
label = "Family income for weight tests"
141+
142+
143+
def _family_weight_tax_benefit_system():
144+
tax_benefit_system = FamilyWeightTaxBenefitSystem()
145+
for variable in (
146+
person_id,
147+
person_family_id,
148+
person_family_role,
149+
person_household_id,
150+
person_household_role,
151+
household_weight,
152+
family_income_for_weight_test,
153+
):
154+
tax_benefit_system.add_variable(variable)
155+
return tax_benefit_system
156+
157+
158+
def _family_weight_dataset() -> Dataset:
159+
return Dataset.from_dataframe(
160+
pd.DataFrame(
161+
{
162+
"person_id__2022": [0, 1, 2],
163+
"person_family_id__2022": [0, 0, 1],
164+
"person_family_role__2022": ["member", "member", "member"],
165+
"person_household_id__2022": [0, 0, 0],
166+
"person_household_role__2022": ["member", "member", "member"],
167+
"household_weight__2022": [50.0, 50.0, 50.0],
168+
"family_income_for_weight_test__2022-01": [100.0, 100.0, 200.0],
169+
}
170+
),
171+
"2022",
172+
)
173+
174+
175+
def test__given_non_household_group_entity__then_weights_are_not_summed_by_members():
176+
# Given
177+
simulation = CoreMicrosimulation(
178+
tax_benefit_system=_family_weight_tax_benefit_system(),
179+
dataset=_family_weight_dataset(),
180+
)
181+
182+
# When
183+
weights = simulation.get_weights(
184+
"family_income_for_weight_test",
185+
"2022-01",
186+
)
187+
dataframe = simulation.calculate_dataframe(
188+
["family_income_for_weight_test"],
189+
"2022-01",
190+
map_to="family",
191+
)
192+
193+
# Then
194+
np.testing.assert_array_equal(weights, np.array([50.0, 50.0]))
195+
np.testing.assert_array_equal(dataframe.weights, np.array([50.0, 50.0]))

0 commit comments

Comments
 (0)