Skip to content

Commit 69cc1b7

Browse files
authored
Export FLSA overtime premium proxy (#1133)
* Export FLSA overtime premium proxy * Share FLSA EAP occupation codes * Handle categorical FLSA overtime exemptions * Derive clone FLSA overtime premiums from inputs * Use year-specific FLSA overtime thresholds * Source FLSA overtime policy from PE US * Pin PE US FLSA constants from GitHub
1 parent 8967ce7 commit 69cc1b7

10 files changed

Lines changed: 500 additions & 101 deletions

File tree

.github/scripts/check_policyengine_us_dependency.py

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
REPO_ROOT = Path(__file__).resolve().parents[2]
1818
PYPI_JSON_TIMEOUT_SECONDS = 20
1919
POLICYENGINE_US = "policyengine-us"
20+
POLICYENGINE_US_GITHUB_REPO = "github.com/PolicyEngine/policyengine-us"
2021
STALE_LOCK_PREFIX = "uv.lock has policyengine-us "
22+
GIT_REF_PREFIX = "uv.lock resolves policyengine-us from a Git ref"
2123

2224

2325
def _annotation(level: str, message: str) -> str:
@@ -83,9 +85,39 @@ def _latest_pypi_version() -> str:
8385
return version
8486

8587

88+
def _is_policyengine_us_git_source(source: dict[str, object]) -> bool:
89+
git_source = source.get("git")
90+
return isinstance(git_source, str) and POLICYENGINE_US_GITHUB_REPO in git_source
91+
92+
93+
def _is_policyengine_us_git_dependency(dependency: str) -> bool:
94+
return (
95+
dependency.startswith(f"{POLICYENGINE_US} @ git+")
96+
and POLICYENGINE_US_GITHUB_REPO in dependency
97+
and re.search(r"@[0-9a-f]{40}$", dependency) is not None
98+
)
99+
100+
101+
def _allows_temporary_git_ref(
102+
locked_version: str,
103+
source: dict[str, object],
104+
project_dependency: str,
105+
latest_version: str | None,
106+
) -> bool:
107+
return (
108+
latest_version is not None
109+
and _compare_versions(locked_version, latest_version) > 0
110+
and _is_policyengine_us_git_source(source)
111+
and _is_policyengine_us_git_dependency(project_dependency)
112+
)
113+
114+
86115
def check_dependency(root: Path, latest_version: str | None = None) -> list[str]:
87116
locked_version, source = _locked_policyengine_us(root)
88117
project_dependency = _project_policyengine_us_dependency(root)
118+
git_ref_allowed = _allows_temporary_git_ref(
119+
locked_version, source, project_dependency, latest_version
120+
)
89121

90122
violations: list[str] = []
91123
if (
@@ -99,19 +131,23 @@ def check_dependency(root: Path, latest_version: str | None = None) -> list[str]
99131
)
100132

101133
expected_dependency = f"{POLICYENGINE_US}=={locked_version}"
102-
if project_dependency != expected_dependency:
134+
if project_dependency != expected_dependency and not git_ref_allowed:
103135
violations.append(
104136
f"pyproject.toml must pin {expected_dependency} to match uv.lock; "
105137
f"found {project_dependency!r}."
106138
)
107139

108-
if "git" in source:
140+
if "git" in source and not git_ref_allowed:
109141
violations.append(
110-
"uv.lock resolves policyengine-us from a Git ref. Prefer an exact "
142+
f"{GIT_REF_PREFIX}. Prefer an exact "
111143
f"PyPI release pin once policyengine-us {locked_version} is published."
112144
)
113145

114-
if "@" in project_dependency and "git+" in project_dependency:
146+
if (
147+
"@" in project_dependency
148+
and "git+" in project_dependency
149+
and not git_ref_allowed
150+
):
115151
violations.append(
116152
"pyproject.toml pins policyengine-us to a Git ref. Prefer an exact "
117153
"PyPI release pin for production data builds."
@@ -159,8 +195,22 @@ def main() -> int:
159195
return 0
160196

161197
if not violations:
162-
locked_version, _source = _locked_policyengine_us(REPO_ROOT)
198+
locked_version, source = _locked_policyengine_us(REPO_ROOT)
163199
print(f"policyengine-us dependency is current at {locked_version}.")
200+
if _allows_temporary_git_ref(
201+
locked_version,
202+
source,
203+
_project_policyengine_us_dependency(REPO_ROOT),
204+
latest_version,
205+
):
206+
print(
207+
_annotation(
208+
"warning",
209+
f"policyengine-us {locked_version} is temporarily pinned to "
210+
"GitHub because it is newer than the latest PyPI release. "
211+
"Replace it with an exact PyPI release pin once published.",
212+
)
213+
)
164214
return 0
165215

166216
has_blocking_violation = False
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Export a data-backed FLSA overtime premium proxy from CPS and enhanced CPS.

policyengine_us_data/datasets/cps/cps.py

Lines changed: 161 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from contextlib import closing, contextmanager
2+
from functools import lru_cache
23
from importlib.resources import files
34
from policyengine_core.data import Dataset
45
from policyengine_us_data.storage import STORAGE_FOLDER, DOCS_FOLDER
@@ -18,6 +19,11 @@
1819
import pandas as pd
1920
import yaml
2021
from typing import Type
22+
from policyengine_us.model_api import WEEKS_IN_YEAR
23+
from policyengine_us.data.cps import (
24+
CPS_FLSA_EXECUTIVE_ADMINISTRATIVE_PROFESSIONAL_OCCUPATION_CODES,
25+
CPS_FLSA_OVERTIME_OCCUPATION_CODES,
26+
)
2127
from policyengine_us_data.utils.uprating import (
2228
create_policyengine_uprating_factors_table,
2329
)
@@ -83,6 +89,46 @@
8389
"real_estate_taxes": ["real_estate_taxes_is_allocated"],
8490
}
8591

92+
FLSA_EXECUTIVE_ADMINISTRATIVE_PROFESSIONAL_OCCUPATION_CODES = (
93+
CPS_FLSA_EXECUTIVE_ADMINISTRATIVE_PROFESSIONAL_OCCUPATION_CODES
94+
)
95+
FLSA_OVERTIME_OCCUPATION_CODES = CPS_FLSA_OVERTIME_OCCUPATION_CODES
96+
97+
98+
@lru_cache(maxsize=1)
99+
def _policyengine_us_parameters():
100+
from policyengine_us import CountryTaxBenefitSystem
101+
102+
return CountryTaxBenefitSystem().parameters
103+
104+
105+
@lru_cache(maxsize=16)
106+
def _flsa_overtime_policy_for_year(
107+
time_period: int,
108+
) -> tuple[np.float32, np.float32, np.float32, np.float32, np.float32]:
109+
overtime = _policyengine_us_parameters()(
110+
f"{int(time_period)}-01-01"
111+
).gov.irs.income.exemption.overtime
112+
hours_threshold = np.float32(overtime.hours_threshold)
113+
rate_multiplier = np.float32(overtime.rate_multiplier)
114+
workweeks_per_year = np.float32(WEEKS_IN_YEAR)
115+
return (
116+
np.float32(overtime.hce_salary_threshold),
117+
np.float32(overtime.salary_basis_threshold * workweeks_per_year),
118+
np.float32(
119+
overtime.computer_salary_threshold * hours_threshold * workweeks_per_year
120+
),
121+
hours_threshold,
122+
rate_multiplier,
123+
)
124+
125+
126+
def _flsa_overtime_thresholds_for_year(
127+
time_period: int,
128+
) -> tuple[np.float32, np.float32, np.float32]:
129+
return _flsa_overtime_policy_for_year(time_period)[:3]
130+
131+
86132
CURRENT_HEALTH_COVERAGE_REPORTED_VAR_MAP = {
87133
"reported_has_direct_purchase_health_coverage_at_interview": "NOW_DIR",
88134
"reported_has_marketplace_health_coverage_at_interview": "NOW_MRK",
@@ -304,7 +350,7 @@ def generate(self):
304350
logging.info("Adding tips")
305351
add_tips(self, cps)
306352
logging.info("Adding ORG labor-market inputs")
307-
add_org_labor_market_inputs(cps)
353+
add_org_labor_market_inputs(cps, self.time_period)
308354
logging.info("Adding auto loan balance, interest and wealth")
309355
add_auto_loan_interest_and_net_worth(self, cps)
310356
logging.info("Added all variables")
@@ -1190,6 +1236,96 @@ def derive_weeks_worked(weeks_worked: Series | np.ndarray) -> Series | np.ndarra
11901236
return np.clip(weeks_worked, 0, 52)
11911237

11921238

1239+
def derive_flsa_overtime_premium(
1240+
*,
1241+
time_period: int,
1242+
employment_income: Series | np.ndarray,
1243+
hours_worked_last_week: Series | np.ndarray,
1244+
weeks_worked: Series | np.ndarray,
1245+
is_paid_hourly: Series | np.ndarray,
1246+
has_never_worked: Series | np.ndarray,
1247+
is_military: Series | np.ndarray,
1248+
is_executive_administrative_professional: Series | np.ndarray,
1249+
is_farmer_fisher: Series | np.ndarray,
1250+
is_computer_scientist: Series | np.ndarray,
1251+
) -> np.ndarray:
1252+
"""Proxy annual FLSA overtime premium from CPS annual wages and hours.
1253+
1254+
CPS ASEC does not contain a week-by-week earnings history. This constructs
1255+
the premium share implied by the reported/reference week, then applies that
1256+
share to annual employment income for workers not screened as FLSA-exempt.
1257+
"""
1258+
employment_income = np.maximum(
1259+
np.nan_to_num(np.asarray(employment_income, dtype=np.float32), nan=0),
1260+
0,
1261+
)
1262+
hours_worked_last_week = np.maximum(
1263+
np.nan_to_num(np.asarray(hours_worked_last_week, dtype=np.float32), nan=0),
1264+
0,
1265+
)
1266+
weeks_worked = np.maximum(
1267+
np.nan_to_num(np.asarray(weeks_worked, dtype=np.float32), nan=0),
1268+
0,
1269+
)
1270+
is_paid_hourly = np.asarray(is_paid_hourly, dtype=bool)
1271+
has_never_worked = np.asarray(has_never_worked, dtype=bool)
1272+
is_military = np.asarray(is_military, dtype=bool)
1273+
is_executive_administrative_professional = np.asarray(
1274+
is_executive_administrative_professional,
1275+
dtype=bool,
1276+
)
1277+
is_farmer_fisher = np.asarray(is_farmer_fisher, dtype=bool)
1278+
is_computer_scientist = np.asarray(is_computer_scientist, dtype=bool)
1279+
1280+
(
1281+
hce_salary_threshold,
1282+
salary_basis_threshold,
1283+
computer_salary_threshold,
1284+
hours_threshold,
1285+
rate_multiplier,
1286+
) = _flsa_overtime_policy_for_year(time_period)
1287+
1288+
overtime_hours = np.maximum(
1289+
hours_worked_last_week - hours_threshold,
1290+
0,
1291+
)
1292+
straight_time_equivalent_hours = (
1293+
np.minimum(hours_worked_last_week, hours_threshold)
1294+
+ overtime_hours * rate_multiplier
1295+
)
1296+
premium_share = np.divide(
1297+
(rate_multiplier - 1) * overtime_hours,
1298+
straight_time_equivalent_hours,
1299+
out=np.zeros_like(employment_income, dtype=np.float32),
1300+
where=straight_time_equivalent_hours > 0,
1301+
)
1302+
1303+
salary_threshold = np.full_like(
1304+
employment_income,
1305+
hce_salary_threshold,
1306+
dtype=np.float32,
1307+
)
1308+
salary_threshold = np.where(
1309+
is_computer_scientist,
1310+
min(computer_salary_threshold, hce_salary_threshold),
1311+
salary_threshold,
1312+
)
1313+
salary_threshold = np.where(
1314+
is_executive_administrative_professional | is_farmer_fisher,
1315+
min(salary_basis_threshold, hce_salary_threshold),
1316+
salary_threshold,
1317+
)
1318+
always_exempt = has_never_worked | is_military
1319+
salary_threshold = np.where(always_exempt, 0, salary_threshold)
1320+
1321+
is_exempt = always_exempt | (
1322+
(employment_income >= salary_threshold) & ~is_paid_hourly
1323+
)
1324+
eligible = ~is_exempt & (weeks_worked > 0)
1325+
premium = np.where(eligible, employment_income * premium_share, 0)
1326+
return np.minimum(premium, employment_income).astype(np.float32)
1327+
1328+
11931329
@pipeline_node(
11941330
PipelineNode(
11951331
id="add_personal_income_variables",
@@ -2824,16 +2960,19 @@ def add_tips(self, cps: h5py.File):
28242960
id="add_org_inputs",
28252961
label="ORG Labor-Market Inputs",
28262962
node_type="library",
2827-
description="Impute hourly wage, hourly-pay status, and union coverage from CPS ORG donors.",
2963+
description=(
2964+
"Impute hourly wage, hourly-pay status, and union coverage from CPS "
2965+
"ORG donors, then derive FLSA overtime premium."
2966+
),
28282967
source_file="policyengine_us_data/datasets/cps/cps.py",
28292968
status="current",
28302969
stability="moving",
28312970
pathways=["data_build"],
28322971
validation_commands=["uv run pytest tests/unit/datasets/test_org.py"],
28332972
)
28342973
)
2835-
def add_org_labor_market_inputs(cps: h5py.File) -> None:
2836-
"""Impute ORG-derived wage and union inputs onto CPS persons."""
2974+
def add_org_labor_market_inputs(cps: h5py.File, time_period: int) -> None:
2975+
"""Impute ORG-derived labor-market inputs and derive overtime premium."""
28372976
n_persons = len(np.asarray(cps["age"]))
28382977
household_ids = np.asarray(cps["household_id"], dtype=np.int64)
28392978
person_household_ids = np.asarray(
@@ -2890,48 +3029,32 @@ def add_org_labor_market_inputs(cps: h5py.File) -> None:
28903029
else:
28913030
cps[variable] = values.astype(np.float32)
28923031

3032+
cps["fsla_overtime_premium"] = derive_flsa_overtime_premium(
3033+
time_period=time_period,
3034+
employment_income=cps["employment_income"],
3035+
hours_worked_last_week=cps["hours_worked_last_week"],
3036+
weeks_worked=cps["weeks_worked"],
3037+
is_paid_hourly=cps["is_paid_hourly"],
3038+
has_never_worked=cps["has_never_worked"],
3039+
is_military=cps["is_military"],
3040+
is_executive_administrative_professional=cps[
3041+
"is_executive_administrative_professional"
3042+
],
3043+
is_farmer_fisher=cps["is_farmer_fisher"],
3044+
is_computer_scientist=cps["is_computer_scientist"],
3045+
)
3046+
28933047

28943048
def add_overtime_occupation(cps: h5py.File, person: DataFrame) -> None:
28953049
"""Add occupation categories relevant to overtime eligibility calculations.
28963050
Based on:
28973051
https://www.law.cornell.edu/uscode/text/29/213
28983052
https://www.congress.gov/crs-product/IF12480
28993053
"""
2900-
cps["has_never_worked"] = person.POCCU2 == 53
2901-
cps["is_military"] = person.POCCU2 == 52
2902-
cps["is_computer_scientist"] = person.POCCU2 == 8
2903-
cps["is_farmer_fisher"] = person.POCCU2 == 41
3054+
for variable, occupation_code in FLSA_OVERTIME_OCCUPATION_CODES.items():
3055+
cps[variable] = person.POCCU2 == occupation_code
29043056
cps["is_executive_administrative_professional"] = person.POCCU2.isin(
2905-
[
2906-
1, # Chief executives, and managers
2907-
2, # Compensation, human resources, and infrastructure managers
2908-
3, # All other managers
2909-
5, # Business operations specialists
2910-
6, # Accountants and auditors
2911-
7, # Financial specialists
2912-
9, # Mathematical science occupations
2913-
10, # Architects, except naval
2914-
11, # Surveyors, cartographers, & photogrammetrists
2915-
12, # Engineering technologists and technicians
2916-
13, # Earth scientists
2917-
14, # Economists
2918-
15, # Psychologists, and other social scientists
2919-
16, # Health and safety specialists
2920-
18, # Lawyers, judges, magistrates, and other judicial workers
2921-
19, # Paralegals and all other legal support workers
2922-
25, # Registered nurses, therapists, and specific pathologists
2923-
26, # Veterinarians
2924-
27, # Health technicians and other healthcare practitioners
2925-
28, # Healthcare support occupations
2926-
29, # First-line supervisors of protective service workers
2927-
34, # First-line supervisors of housekeeping and janitorial workers
2928-
36, # Supervisors of personal care and service workers
2929-
38, # First-line supervisors of retail/non-retail sales workers
2930-
39, # Sales and related occupations
2931-
40, # Office & administrative support occupations
2932-
42, # First-line supervisors of construction trades workers
2933-
50, # Supervisors of transportation and flight related workers
2934-
]
3057+
FLSA_EXECUTIVE_ADMINISTRATIVE_PROFESSIONAL_OCCUPATION_CODES
29353058
)
29363059

29373060

0 commit comments

Comments
 (0)