Skip to content

Commit 1fa32bd

Browse files
committed
Add Treasury tipped occupation codes to CPS data
1 parent 86ac0bc commit 1fa32bd

4 files changed

Lines changed: 107 additions & 0 deletions

File tree

policyengine_us_data/datasets/cps/cps.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
prioritize_reported_recipients,
2020
)
2121
from policyengine_us_data.utils.randomness import seeded_rng
22+
from policyengine_us_data.datasets.cps.tipped_occupation import (
23+
derive_treasury_tipped_occupation_code,
24+
)
2225

2326

2427
class CPS(Dataset):
@@ -479,6 +482,9 @@ def children_per_parent(col: str) -> pd.DataFrame:
479482
cps["is_full_time_college_student"] = person.A_HSCOL == 2
480483

481484
cps["detailed_occupation_recode"] = person.POCCU2
485+
cps["treasury_tipped_occupation_code"] = derive_treasury_tipped_occupation_code(
486+
person.PEIOOCC
487+
)
482488
add_overtime_occupation(cps, person)
483489

484490

policyengine_us_data/datasets/cps/extended_cps.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ def _supports_structural_mortgage_inputs() -> bool:
3737
"is_hispanic",
3838
"detailed_occupation_recode",
3939
]
40+
if has_policyengine_us_variables("treasury_tipped_occupation_code"):
41+
CPS_CLONE_FEATURE_VARIABLES.append("treasury_tipped_occupation_code")
4042

4143
# Predictors used to rematch CPS features onto the PUF clone half.
4244
# These are all available on the CPS half and on the doubled extended CPS.
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
from __future__ import annotations
2+
3+
import numpy as np
4+
import pandas as pd
5+
6+
# Derived by joining:
7+
# 1. Treasury Tipped Occupation Codes (TTOCs) and related 2018 SOC codes from
8+
# the IRS "Occupations that customarily and regularly received tips on or
9+
# before December 31, 2024" list / IRB 2025-42.
10+
# 2. The Census Bureau 2018 occupation code list crosswalk from 2018 Census
11+
# occupation code to 2018 SOC code.
12+
#
13+
# A few IRS SOC entries correspond to multiple TTOCs. For those collisions we
14+
# pick one representative TTOC because the current policyengine-us logic only
15+
# needs to distinguish listed occupations (TTOC > 0) from unlisted ones. The
16+
# more detailed approximation work belongs here in policyengine-us-data, not in
17+
# policyengine-us.
18+
CENSUS_OCCUPATION_CODE_TO_TTOC = {
19+
725: 502,
20+
2350: 507,
21+
2633: 502,
22+
2752: 206,
23+
2755: 207,
24+
2770: 208,
25+
2910: 503,
26+
3602: 501,
27+
3630: 602,
28+
4000: 105,
29+
4010: 106,
30+
4030: 106,
31+
4040: 101,
32+
4055: 107,
33+
4110: 102,
34+
4120: 103,
35+
4130: 104,
36+
4140: 108,
37+
4150: 109,
38+
4160: 106,
39+
4230: 304,
40+
4251: 402,
41+
4350: 506,
42+
4420: 210,
43+
4500: 603,
44+
4510: 603,
45+
4521: 605,
46+
4522: 601,
47+
4600: 508,
48+
4621: 607,
49+
4655: 501,
50+
5130: 203,
51+
5300: 303,
52+
6355: 403,
53+
6442: 404,
54+
7120: 401,
55+
7200: 409,
56+
7315: 405,
57+
7320: 406,
58+
7340: 401,
59+
7540: 408,
60+
7610: 401,
61+
7800: 110,
62+
8510: 401,
63+
9122: 806,
64+
9141: 803,
65+
9142: 802,
66+
9350: 801,
67+
9610: 805,
68+
9620: 809,
69+
}
70+
71+
72+
def derive_treasury_tipped_occupation_code(
73+
census_occupation_codes: pd.Series | np.ndarray,
74+
) -> np.ndarray:
75+
"""Map CPS PEIOOCC detailed occupation codes to Treasury tipped codes."""
76+
77+
values = pd.Series(census_occupation_codes, copy=False)
78+
values = pd.to_numeric(values, errors="coerce").fillna(-1).astype(int)
79+
return (
80+
values.map(CENSUS_OCCUPATION_CODE_TO_TTOC)
81+
.fillna(0)
82+
.astype(np.int16)
83+
.to_numpy()
84+
)

policyengine_us_data/tests/test_extended_cps.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
apply_retirement_constraints,
2727
reconcile_ss_subcomponents,
2828
)
29+
from policyengine_us_data.datasets.cps.tipped_occupation import (
30+
derive_treasury_tipped_occupation_code,
31+
)
2932

3033

3134
class TestVariableListConsistency:
@@ -201,6 +204,15 @@ def test_se_pension_zeroed_without_se_income(
201204
).all(), "SE pension should be zero without SE income"
202205

203206

207+
class TestTreasuryTippedOccupationCode:
208+
def test_derive_treasury_tipped_occupation_code(self):
209+
derived = derive_treasury_tipped_occupation_code(
210+
np.array([4040, 4110, 4230, 2770, -1, 9999])
211+
)
212+
213+
assert derived.tolist() == [101, 102, 304, 208, 0, 0]
214+
215+
204216
class TestSSReconciliation:
205217
"""Post-processing SS normalization ensures sub-components sum to total."""
206218

@@ -449,6 +461,7 @@ def test_clone_feature_imputation_rematches_outputs_and_derives_flags(
449461
"cps_race": [2, 1],
450462
"is_hispanic": [0, 1],
451463
"detailed_occupation_recode": [8, 41],
464+
"treasury_tipped_occupation_code": [101, 304],
452465
}
453466
)
454467

@@ -486,5 +499,7 @@ def calculate_dataframe(self, columns):
486499
assert result["is_male"].tolist() == [1, 0]
487500
assert result["cps_race"].tolist() == [2, 1]
488501
assert result["is_hispanic"].tolist() == [0, 1]
502+
if "treasury_tipped_occupation_code" in result.columns:
503+
assert result["treasury_tipped_occupation_code"].tolist() == [101, 304]
489504
assert result["is_computer_scientist"].tolist() == [True, False]
490505
assert result["is_farmer_fisher"].tolist() == [False, True]

0 commit comments

Comments
 (0)