|
1 | 1 | from contextlib import closing, contextmanager |
| 2 | +from functools import lru_cache |
2 | 3 | from importlib.resources import files |
3 | 4 | from policyengine_core.data import Dataset |
4 | 5 | from policyengine_us_data.storage import STORAGE_FOLDER, DOCS_FOLDER |
|
18 | 19 | import pandas as pd |
19 | 20 | import yaml |
20 | 21 | from typing import Type |
| 22 | +from policyengine_us.model_api import WEEKS_IN_YEAR |
| 23 | +from policyengine_us.data.cps import ( |
| 24 | + CPS_FLSA_EXECUTIVE_ADMINISTRATIVE_PROFESSIONAL_OCCUPATION_CODES, |
| 25 | + CPS_FLSA_OVERTIME_OCCUPATION_CODES, |
| 26 | +) |
21 | 27 | from policyengine_us_data.utils.uprating import ( |
22 | 28 | create_policyengine_uprating_factors_table, |
23 | 29 | ) |
|
83 | 89 | "real_estate_taxes": ["real_estate_taxes_is_allocated"], |
84 | 90 | } |
85 | 91 |
|
| 92 | +FLSA_EXECUTIVE_ADMINISTRATIVE_PROFESSIONAL_OCCUPATION_CODES = ( |
| 93 | + CPS_FLSA_EXECUTIVE_ADMINISTRATIVE_PROFESSIONAL_OCCUPATION_CODES |
| 94 | +) |
| 95 | +FLSA_OVERTIME_OCCUPATION_CODES = CPS_FLSA_OVERTIME_OCCUPATION_CODES |
| 96 | + |
| 97 | + |
| 98 | +@lru_cache(maxsize=1) |
| 99 | +def _policyengine_us_parameters(): |
| 100 | + from policyengine_us import CountryTaxBenefitSystem |
| 101 | + |
| 102 | + return CountryTaxBenefitSystem().parameters |
| 103 | + |
| 104 | + |
| 105 | +@lru_cache(maxsize=16) |
| 106 | +def _flsa_overtime_policy_for_year( |
| 107 | + time_period: int, |
| 108 | +) -> tuple[np.float32, np.float32, np.float32, np.float32, np.float32]: |
| 109 | + overtime = _policyengine_us_parameters()( |
| 110 | + f"{int(time_period)}-01-01" |
| 111 | + ).gov.irs.income.exemption.overtime |
| 112 | + hours_threshold = np.float32(overtime.hours_threshold) |
| 113 | + rate_multiplier = np.float32(overtime.rate_multiplier) |
| 114 | + workweeks_per_year = np.float32(WEEKS_IN_YEAR) |
| 115 | + return ( |
| 116 | + np.float32(overtime.hce_salary_threshold), |
| 117 | + np.float32(overtime.salary_basis_threshold * workweeks_per_year), |
| 118 | + np.float32( |
| 119 | + overtime.computer_salary_threshold * hours_threshold * workweeks_per_year |
| 120 | + ), |
| 121 | + hours_threshold, |
| 122 | + rate_multiplier, |
| 123 | + ) |
| 124 | + |
| 125 | + |
| 126 | +def _flsa_overtime_thresholds_for_year( |
| 127 | + time_period: int, |
| 128 | +) -> tuple[np.float32, np.float32, np.float32]: |
| 129 | + return _flsa_overtime_policy_for_year(time_period)[:3] |
| 130 | + |
| 131 | + |
86 | 132 | CURRENT_HEALTH_COVERAGE_REPORTED_VAR_MAP = { |
87 | 133 | "reported_has_direct_purchase_health_coverage_at_interview": "NOW_DIR", |
88 | 134 | "reported_has_marketplace_health_coverage_at_interview": "NOW_MRK", |
@@ -304,7 +350,7 @@ def generate(self): |
304 | 350 | logging.info("Adding tips") |
305 | 351 | add_tips(self, cps) |
306 | 352 | logging.info("Adding ORG labor-market inputs") |
307 | | - add_org_labor_market_inputs(cps) |
| 353 | + add_org_labor_market_inputs(cps, self.time_period) |
308 | 354 | logging.info("Adding auto loan balance, interest and wealth") |
309 | 355 | add_auto_loan_interest_and_net_worth(self, cps) |
310 | 356 | logging.info("Added all variables") |
@@ -1190,6 +1236,96 @@ def derive_weeks_worked(weeks_worked: Series | np.ndarray) -> Series | np.ndarra |
1190 | 1236 | return np.clip(weeks_worked, 0, 52) |
1191 | 1237 |
|
1192 | 1238 |
|
| 1239 | +def derive_flsa_overtime_premium( |
| 1240 | + *, |
| 1241 | + time_period: int, |
| 1242 | + employment_income: Series | np.ndarray, |
| 1243 | + hours_worked_last_week: Series | np.ndarray, |
| 1244 | + weeks_worked: Series | np.ndarray, |
| 1245 | + is_paid_hourly: Series | np.ndarray, |
| 1246 | + has_never_worked: Series | np.ndarray, |
| 1247 | + is_military: Series | np.ndarray, |
| 1248 | + is_executive_administrative_professional: Series | np.ndarray, |
| 1249 | + is_farmer_fisher: Series | np.ndarray, |
| 1250 | + is_computer_scientist: Series | np.ndarray, |
| 1251 | +) -> np.ndarray: |
| 1252 | + """Proxy annual FLSA overtime premium from CPS annual wages and hours. |
| 1253 | +
|
| 1254 | + CPS ASEC does not contain a week-by-week earnings history. This constructs |
| 1255 | + the premium share implied by the reported/reference week, then applies that |
| 1256 | + share to annual employment income for workers not screened as FLSA-exempt. |
| 1257 | + """ |
| 1258 | + employment_income = np.maximum( |
| 1259 | + np.nan_to_num(np.asarray(employment_income, dtype=np.float32), nan=0), |
| 1260 | + 0, |
| 1261 | + ) |
| 1262 | + hours_worked_last_week = np.maximum( |
| 1263 | + np.nan_to_num(np.asarray(hours_worked_last_week, dtype=np.float32), nan=0), |
| 1264 | + 0, |
| 1265 | + ) |
| 1266 | + weeks_worked = np.maximum( |
| 1267 | + np.nan_to_num(np.asarray(weeks_worked, dtype=np.float32), nan=0), |
| 1268 | + 0, |
| 1269 | + ) |
| 1270 | + is_paid_hourly = np.asarray(is_paid_hourly, dtype=bool) |
| 1271 | + has_never_worked = np.asarray(has_never_worked, dtype=bool) |
| 1272 | + is_military = np.asarray(is_military, dtype=bool) |
| 1273 | + is_executive_administrative_professional = np.asarray( |
| 1274 | + is_executive_administrative_professional, |
| 1275 | + dtype=bool, |
| 1276 | + ) |
| 1277 | + is_farmer_fisher = np.asarray(is_farmer_fisher, dtype=bool) |
| 1278 | + is_computer_scientist = np.asarray(is_computer_scientist, dtype=bool) |
| 1279 | + |
| 1280 | + ( |
| 1281 | + hce_salary_threshold, |
| 1282 | + salary_basis_threshold, |
| 1283 | + computer_salary_threshold, |
| 1284 | + hours_threshold, |
| 1285 | + rate_multiplier, |
| 1286 | + ) = _flsa_overtime_policy_for_year(time_period) |
| 1287 | + |
| 1288 | + overtime_hours = np.maximum( |
| 1289 | + hours_worked_last_week - hours_threshold, |
| 1290 | + 0, |
| 1291 | + ) |
| 1292 | + straight_time_equivalent_hours = ( |
| 1293 | + np.minimum(hours_worked_last_week, hours_threshold) |
| 1294 | + + overtime_hours * rate_multiplier |
| 1295 | + ) |
| 1296 | + premium_share = np.divide( |
| 1297 | + (rate_multiplier - 1) * overtime_hours, |
| 1298 | + straight_time_equivalent_hours, |
| 1299 | + out=np.zeros_like(employment_income, dtype=np.float32), |
| 1300 | + where=straight_time_equivalent_hours > 0, |
| 1301 | + ) |
| 1302 | + |
| 1303 | + salary_threshold = np.full_like( |
| 1304 | + employment_income, |
| 1305 | + hce_salary_threshold, |
| 1306 | + dtype=np.float32, |
| 1307 | + ) |
| 1308 | + salary_threshold = np.where( |
| 1309 | + is_computer_scientist, |
| 1310 | + min(computer_salary_threshold, hce_salary_threshold), |
| 1311 | + salary_threshold, |
| 1312 | + ) |
| 1313 | + salary_threshold = np.where( |
| 1314 | + is_executive_administrative_professional | is_farmer_fisher, |
| 1315 | + min(salary_basis_threshold, hce_salary_threshold), |
| 1316 | + salary_threshold, |
| 1317 | + ) |
| 1318 | + always_exempt = has_never_worked | is_military |
| 1319 | + salary_threshold = np.where(always_exempt, 0, salary_threshold) |
| 1320 | + |
| 1321 | + is_exempt = always_exempt | ( |
| 1322 | + (employment_income >= salary_threshold) & ~is_paid_hourly |
| 1323 | + ) |
| 1324 | + eligible = ~is_exempt & (weeks_worked > 0) |
| 1325 | + premium = np.where(eligible, employment_income * premium_share, 0) |
| 1326 | + return np.minimum(premium, employment_income).astype(np.float32) |
| 1327 | + |
| 1328 | + |
1193 | 1329 | @pipeline_node( |
1194 | 1330 | PipelineNode( |
1195 | 1331 | id="add_personal_income_variables", |
@@ -2824,16 +2960,19 @@ def add_tips(self, cps: h5py.File): |
2824 | 2960 | id="add_org_inputs", |
2825 | 2961 | label="ORG Labor-Market Inputs", |
2826 | 2962 | node_type="library", |
2827 | | - description="Impute hourly wage, hourly-pay status, and union coverage from CPS ORG donors.", |
| 2963 | + description=( |
| 2964 | + "Impute hourly wage, hourly-pay status, and union coverage from CPS " |
| 2965 | + "ORG donors, then derive FLSA overtime premium." |
| 2966 | + ), |
2828 | 2967 | source_file="policyengine_us_data/datasets/cps/cps.py", |
2829 | 2968 | status="current", |
2830 | 2969 | stability="moving", |
2831 | 2970 | pathways=["data_build"], |
2832 | 2971 | validation_commands=["uv run pytest tests/unit/datasets/test_org.py"], |
2833 | 2972 | ) |
2834 | 2973 | ) |
2835 | | -def add_org_labor_market_inputs(cps: h5py.File) -> None: |
2836 | | - """Impute ORG-derived wage and union inputs onto CPS persons.""" |
| 2974 | +def add_org_labor_market_inputs(cps: h5py.File, time_period: int) -> None: |
| 2975 | + """Impute ORG-derived labor-market inputs and derive overtime premium.""" |
2837 | 2976 | n_persons = len(np.asarray(cps["age"])) |
2838 | 2977 | household_ids = np.asarray(cps["household_id"], dtype=np.int64) |
2839 | 2978 | person_household_ids = np.asarray( |
@@ -2890,48 +3029,32 @@ def add_org_labor_market_inputs(cps: h5py.File) -> None: |
2890 | 3029 | else: |
2891 | 3030 | cps[variable] = values.astype(np.float32) |
2892 | 3031 |
|
| 3032 | + cps["fsla_overtime_premium"] = derive_flsa_overtime_premium( |
| 3033 | + time_period=time_period, |
| 3034 | + employment_income=cps["employment_income"], |
| 3035 | + hours_worked_last_week=cps["hours_worked_last_week"], |
| 3036 | + weeks_worked=cps["weeks_worked"], |
| 3037 | + is_paid_hourly=cps["is_paid_hourly"], |
| 3038 | + has_never_worked=cps["has_never_worked"], |
| 3039 | + is_military=cps["is_military"], |
| 3040 | + is_executive_administrative_professional=cps[ |
| 3041 | + "is_executive_administrative_professional" |
| 3042 | + ], |
| 3043 | + is_farmer_fisher=cps["is_farmer_fisher"], |
| 3044 | + is_computer_scientist=cps["is_computer_scientist"], |
| 3045 | + ) |
| 3046 | + |
2893 | 3047 |
|
2894 | 3048 | def add_overtime_occupation(cps: h5py.File, person: DataFrame) -> None: |
2895 | 3049 | """Add occupation categories relevant to overtime eligibility calculations. |
2896 | 3050 | Based on: |
2897 | 3051 | https://www.law.cornell.edu/uscode/text/29/213 |
2898 | 3052 | https://www.congress.gov/crs-product/IF12480 |
2899 | 3053 | """ |
2900 | | - cps["has_never_worked"] = person.POCCU2 == 53 |
2901 | | - cps["is_military"] = person.POCCU2 == 52 |
2902 | | - cps["is_computer_scientist"] = person.POCCU2 == 8 |
2903 | | - cps["is_farmer_fisher"] = person.POCCU2 == 41 |
| 3054 | + for variable, occupation_code in FLSA_OVERTIME_OCCUPATION_CODES.items(): |
| 3055 | + cps[variable] = person.POCCU2 == occupation_code |
2904 | 3056 | cps["is_executive_administrative_professional"] = person.POCCU2.isin( |
2905 | | - [ |
2906 | | - 1, # Chief executives, and managers |
2907 | | - 2, # Compensation, human resources, and infrastructure managers |
2908 | | - 3, # All other managers |
2909 | | - 5, # Business operations specialists |
2910 | | - 6, # Accountants and auditors |
2911 | | - 7, # Financial specialists |
2912 | | - 9, # Mathematical science occupations |
2913 | | - 10, # Architects, except naval |
2914 | | - 11, # Surveyors, cartographers, & photogrammetrists |
2915 | | - 12, # Engineering technologists and technicians |
2916 | | - 13, # Earth scientists |
2917 | | - 14, # Economists |
2918 | | - 15, # Psychologists, and other social scientists |
2919 | | - 16, # Health and safety specialists |
2920 | | - 18, # Lawyers, judges, magistrates, and other judicial workers |
2921 | | - 19, # Paralegals and all other legal support workers |
2922 | | - 25, # Registered nurses, therapists, and specific pathologists |
2923 | | - 26, # Veterinarians |
2924 | | - 27, # Health technicians and other healthcare practitioners |
2925 | | - 28, # Healthcare support occupations |
2926 | | - 29, # First-line supervisors of protective service workers |
2927 | | - 34, # First-line supervisors of housekeeping and janitorial workers |
2928 | | - 36, # Supervisors of personal care and service workers |
2929 | | - 38, # First-line supervisors of retail/non-retail sales workers |
2930 | | - 39, # Sales and related occupations |
2931 | | - 40, # Office & administrative support occupations |
2932 | | - 42, # First-line supervisors of construction trades workers |
2933 | | - 50, # Supervisors of transportation and flight related workers |
2934 | | - ] |
| 3057 | + FLSA_EXECUTIVE_ADMINISTRATIVE_PROFESSIONAL_OCCUPATION_CODES |
2935 | 3058 | ) |
2936 | 3059 |
|
2937 | 3060 |
|
|
0 commit comments