Skip to content

Commit 78c02f1

Browse files
committed
Fix CPS tip imputation build path
1 parent 3993c5c commit 78c02f1

2 files changed

Lines changed: 83 additions & 3 deletions

File tree

policyengine_us_data/datasets/cps/cps.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1792,6 +1792,9 @@ def add_tips(self, cps: h5py.File):
17921792
raw_data = self.raw_cps(require=True).load()
17931793
raw_person = raw_data["person"]
17941794
cps["is_married"] = raw_person.A_MARITL.isin([1, 2]).values
1795+
cps["is_tipped_occupation"] = derive_is_tipped_occupation(
1796+
derive_treasury_tipped_occupation_code(raw_person.PEIOOCC)
1797+
)
17951798
raw_data.close()
17961799

17971800
cps["is_under_18"] = cps.age < 18
@@ -1809,9 +1812,6 @@ def add_tips(self, cps: h5py.File):
18091812
.values
18101813
)
18111814
cps = pd.DataFrame(cps)
1812-
cps["is_tipped_occupation"] = derive_is_tipped_occupation(
1813-
cps["treasury_tipped_occupation_code"]
1814-
)
18151815

18161816
# Impute tips
18171817

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import pandas as pd
2+
3+
4+
def test_add_tips_derives_tipped_status_from_raw_cps(monkeypatch):
5+
import policyengine_us
6+
import policyengine_us_data.datasets.sipp as sipp_module
7+
from policyengine_us_data.datasets.cps.cps import add_tips
8+
9+
class FakeRawData:
10+
def __init__(self):
11+
self.person = pd.DataFrame(
12+
{
13+
"A_MARITL": [1, 3],
14+
"PEIOOCC": [4040, 9999],
15+
}
16+
)
17+
18+
def __getitem__(self, key):
19+
if key == "person":
20+
return self.person
21+
raise KeyError(key)
22+
23+
def close(self):
24+
pass
25+
26+
class FakeRawCPS:
27+
def __call__(self, require=True):
28+
return self
29+
30+
def load(self):
31+
return FakeRawData()
32+
33+
class FakeDataset:
34+
def __init__(self):
35+
self.raw_cps = FakeRawCPS()
36+
self.saved_dataset = None
37+
38+
def save_dataset(self, data):
39+
self.saved_dataset = data
40+
41+
class FakeMicrosimulation:
42+
def __init__(self, dataset):
43+
self.dataset = dataset
44+
45+
def calculate_dataframe(self, columns, year):
46+
base = pd.DataFrame(
47+
{
48+
"person_id": [1, 2],
49+
"household_id": [10, 20],
50+
"employment_income": [25_000, 30_000],
51+
"age": [30, 45],
52+
"household_weight": [1.0, 1.0],
53+
"is_female": [False, True],
54+
}
55+
)
56+
return base[columns]
57+
58+
class FakeTipModel:
59+
def predict(self, X_test, mean_quantile):
60+
assert X_test["is_tipped_occupation"].tolist() == [True, False]
61+
return pd.DataFrame({"tip_income": [100.0, 0.0]})
62+
63+
class FakeAssetModel:
64+
def predict(self, X_test, mean_quantile):
65+
return pd.DataFrame(
66+
{
67+
"bank_account_assets": [0.0, 0.0],
68+
"stock_assets": [0.0, 0.0],
69+
"bond_assets": [0.0, 0.0],
70+
}
71+
)
72+
73+
monkeypatch.setattr(policyengine_us, "Microsimulation", FakeMicrosimulation)
74+
monkeypatch.setattr(sipp_module, "get_tip_model", lambda: FakeTipModel())
75+
monkeypatch.setattr(sipp_module, "get_asset_model", lambda: FakeAssetModel())
76+
77+
dataset = FakeDataset()
78+
add_tips(dataset, {})
79+
80+
assert dataset.saved_dataset["tip_income"].tolist() == [100.0, 0.0]

0 commit comments

Comments
 (0)