Skip to content

Commit 97ee509

Browse files
committed
Refine SIPP SSI disability training filters
1 parent 8967ce7 commit 97ee509

2 files changed

Lines changed: 103 additions & 12 deletions

File tree

policyengine_us_data/datasets/sipp/sipp.py

Lines changed: 69 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848

4949
SSI_DISABILITY_CRITERIA_VARIABLE = "meets_ssi_disability_criteria"
5050
SSI_DISABILITY_MODEL_VARIABLE = SSI_DISABILITY_CRITERIA_VARIABLE
51-
SSI_DISABILITY_MODEL_VERSION = 6
51+
SSI_DISABILITY_MODEL_VERSION = 7
5252
SSI_DISABILITY_EXPORT_VARIABLES = (SSI_DISABILITY_CRITERIA_VARIABLE,)
5353

5454
# These six CPS/SIPP difficulty items are construction-time predictors for the
@@ -459,6 +459,42 @@ def _yes(df: pd.DataFrame, column: str) -> pd.Series:
459459
return values.fillna(0).astype(float).eq(1)
460460

461461

462+
def _sipp_monthly_earned_income(df: pd.DataFrame) -> pd.Series:
463+
"""Approximate monthly earned income from SIPP job earnings columns."""
464+
job_cols = [col for col in ASSET_JOB_EARNINGS_COLUMNS if col in df]
465+
if job_cols:
466+
return df[job_cols].fillna(0).sum(axis=1)
467+
return df["TPTOTINC"].fillna(0)
468+
469+
470+
def _sipp_monthly_unearned_income(
471+
df: pd.DataFrame, monthly_earned_income: pd.Series
472+
) -> pd.Series:
473+
"""Approximate monthly unearned income as total income net of job earnings."""
474+
return (df["TPTOTINC"].fillna(0) - monthly_earned_income).clip(lower=0)
475+
476+
477+
def _approximate_monthly_ssi_countable_income(
478+
monthly_earned_income: pd.Series,
479+
monthly_unearned_income: pd.Series,
480+
*,
481+
general_exclusion: float,
482+
earned_exclusion: float,
483+
earned_share_excluded: float,
484+
) -> pd.Series:
485+
"""Apply standard SSI income exclusions to monthly SIPP income proxies."""
486+
applied_general = np.minimum(general_exclusion, monthly_unearned_income)
487+
countable_unearned = monthly_unearned_income - applied_general
488+
leftover_general = general_exclusion - applied_general
489+
490+
earned_after_flat_exclusions = (
491+
monthly_earned_income - earned_exclusion - leftover_general
492+
).clip(lower=0)
493+
countable_earned = earned_after_flat_exclusions * (1 - earned_share_excluded)
494+
495+
return countable_unearned + countable_earned
496+
497+
462498
def _add_ssi_disability_difficulty_predictors(df: pd.DataFrame) -> None:
463499
for predictor, source_column in SIPP_SSI_DISABILITY_DIFFICULTY_COLUMNS.items():
464500
df[predictor] = _yes(df, source_column)
@@ -491,25 +527,35 @@ def _observed_ssi_disability_label_mask(
491527
def _ssi_financial_candidate_mask(
492528
df: pd.DataFrame, time_period: int = 2024
493529
) -> pd.Series:
494-
"""Approximate non-disability SSI financial eligibility in SIPP.
530+
"""Approximate non-disability SSI screening eligibility in SIPP.
495531
496532
This is only a training-frame screen. It avoids treating people whose
497-
resources or income make SSI receipt structurally unlikely as clean
498-
non-disabled labels.
533+
resources, countable income, or SGA-level earnings make SSI receipt
534+
structurally unlikely as clean non-disabled labels.
499535
"""
500536
try:
501537
from policyengine_us import CountryTaxBenefitSystem
502538

503-
p = CountryTaxBenefitSystem().parameters(f"{time_period}-01-01").gov.ssa.ssi
539+
parameters = CountryTaxBenefitSystem().parameters(f"{time_period}-01-01")
540+
p = parameters.gov.ssa.ssi
504541
individual_resource_limit = float(p.eligibility.resources.limit.individual)
505542
couple_resource_limit = float(p.eligibility.resources.limit.couple)
506543
individual_fbr = float(p.amount.individual)
507544
couple_fbr = float(p.amount.couple)
545+
income_exclusions = p.income.exclusions
546+
general_exclusion = float(income_exclusions.general)
547+
earned_exclusion = float(income_exclusions.earned)
548+
earned_share_excluded = float(income_exclusions.earned_share)
549+
non_blind_sga = float(parameters.gov.ssa.sga.non_blind)
508550
except Exception:
509551
individual_resource_limit = 2_000.0
510552
couple_resource_limit = 3_000.0
511553
individual_fbr = 943.0
512554
couple_fbr = 1_415.0
555+
general_exclusion = 20.0
556+
earned_exclusion = 65.0
557+
earned_share_excluded = 0.5
558+
non_blind_sga = 1_550.0
513559

514560
resource_limit = np.where(
515561
df["is_married"].astype(bool),
@@ -526,9 +572,23 @@ def _ssi_financial_candidate_mask(
526572
+ df["stock_assets"].fillna(0)
527573
+ df["bond_assets"].fillna(0)
528574
)
529-
monthly_income = df["TPTOTINC"].fillna(0)
530-
return (liquid_resources <= resource_limit) & (
531-
monthly_income <= monthly_income_limit * 2
575+
monthly_earned_income = _sipp_monthly_earned_income(df)
576+
monthly_unearned_income = _sipp_monthly_unearned_income(df, monthly_earned_income)
577+
monthly_countable_income = _approximate_monthly_ssi_countable_income(
578+
monthly_earned_income,
579+
monthly_unearned_income,
580+
general_exclusion=general_exclusion,
581+
earned_exclusion=earned_exclusion,
582+
earned_share_excluded=earned_share_excluded,
583+
)
584+
difficulty_seeing = df.get("difficulty_seeing", _yes(df, "ESEEING"))
585+
is_blind = pd.Series(difficulty_seeing, index=df.index).fillna(False).astype(bool)
586+
passes_sga_gate = is_blind | monthly_earned_income.le(non_blind_sga)
587+
588+
return (
589+
(liquid_resources <= resource_limit)
590+
& monthly_countable_income.le(monthly_income_limit)
591+
& passes_sga_gate
532592
)
533593

534594

@@ -544,7 +604,7 @@ def build_ssi_disability_training_frame(
544604
df["age"] = df.TAGE
545605
df["is_female"] = df.ESEX == 2
546606
df["is_married"] = df.EMS == 1
547-
df["employment_income"] = df.TPTOTINC.fillna(0) * 12
607+
df["employment_income"] = _sipp_monthly_earned_income(df) * 12
548608
df["interest_income"] = (df["TINC_BANK"].fillna(0) + df["TINC_BOND"].fillna(0)) * 12
549609
df["dividend_income"] = df["TINC_STMF"].fillna(0) * 12
550610
df["rental_income"] = df["TINC_RENT"].fillna(0) * 12

tests/unit/datasets/test_sipp_ssi_disability.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,37 @@ def test_build_ssi_disability_training_frame_screens_financially():
7171
)
7272

7373

74+
def test_build_ssi_disability_training_frame_screens_nonblind_sga():
75+
frame = _base_sipp_frame().iloc[[2]].copy()
76+
frame["TPTOTINC"] = 1_600.0
77+
frame["TJB1_MSUM"] = 1_600.0
78+
79+
result = build_ssi_disability_training_frame(frame)
80+
81+
assert not result["ssi_disability_training_candidate"].iloc[0]
82+
83+
84+
def test_build_ssi_disability_training_frame_does_not_sga_screen_blind_records():
85+
frame = _base_sipp_frame().iloc[[2]].copy()
86+
frame["TPTOTINC"] = 1_600.0
87+
frame["TJB1_MSUM"] = 1_600.0
88+
frame["ESEEING"] = 1
89+
90+
result = build_ssi_disability_training_frame(frame)
91+
92+
assert result["ssi_disability_training_candidate"].iloc[0]
93+
94+
95+
def test_build_ssi_disability_training_frame_uses_countable_income_threshold():
96+
frame = _base_sipp_frame().iloc[[2]].copy()
97+
frame["TPTOTINC"] = 1_500.0
98+
frame["TJB1_MSUM"] = 0.0
99+
100+
result = build_ssi_disability_training_frame(frame)
101+
102+
assert not result["ssi_disability_training_candidate"].iloc[0]
103+
104+
74105
def test_build_ssi_disability_training_frame_uses_all_disability_amounts():
75106
frame = _base_sipp_frame().iloc[[2]].copy()
76107
frame["TDIS6AMT"] = 100
@@ -81,7 +112,7 @@ def test_build_ssi_disability_training_frame_uses_all_disability_amounts():
81112

82113

83114
def test_ssi_disability_training_usecols_include_label_and_income_columns():
84-
assert {"TPTOTINC", "RSSI_YRYN"} <= set(SSI_DISABILITY_COLUMNS)
115+
assert {"TPTOTINC", "TJB1_MSUM", "RSSI_YRYN"} <= set(SSI_DISABILITY_COLUMNS)
85116
assert {"ASSI_YRYN", "ASSI_BRSN"} <= set(SSI_DISABILITY_COLUMNS)
86117
assert {
87118
"ESELFCARE",
@@ -101,9 +132,9 @@ def test_ssi_disability_predictors_use_six_comparable_difficulty_items():
101132

102133

103134
def test_ssi_disability_model_cache_version_tracks_predictor_schema():
104-
assert SSI_DISABILITY_MODEL_VERSION == 6
135+
assert SSI_DISABILITY_MODEL_VERSION == 7
105136
assert _ssi_disability_model_path(2024).name == (
106-
"ssi_disability_criteria_v6_2024.pkl"
137+
"ssi_disability_criteria_v7_2024.pkl"
107138
)
108139

109140

0 commit comments

Comments
 (0)