4848
4949SSI_DISABILITY_CRITERIA_VARIABLE = "meets_ssi_disability_criteria"
5050SSI_DISABILITY_MODEL_VARIABLE = SSI_DISABILITY_CRITERIA_VARIABLE
51- SSI_DISABILITY_MODEL_VERSION = 6
51+ SSI_DISABILITY_MODEL_VERSION = 7
5252SSI_DISABILITY_EXPORT_VARIABLES = (SSI_DISABILITY_CRITERIA_VARIABLE ,)
5353
5454# These six CPS/SIPP difficulty items are construction-time predictors for the
@@ -459,6 +459,42 @@ def _yes(df: pd.DataFrame, column: str) -> pd.Series:
459459 return values .fillna (0 ).astype (float ).eq (1 )
460460
461461
462+ def _sipp_monthly_earned_income (df : pd .DataFrame ) -> pd .Series :
463+ """Approximate monthly earned income from SIPP job earnings columns."""
464+ job_cols = [col for col in ASSET_JOB_EARNINGS_COLUMNS if col in df ]
465+ if job_cols :
466+ return df [job_cols ].fillna (0 ).sum (axis = 1 )
467+ return df ["TPTOTINC" ].fillna (0 )
468+
469+
470+ def _sipp_monthly_unearned_income (
471+ df : pd .DataFrame , monthly_earned_income : pd .Series
472+ ) -> pd .Series :
473+ """Approximate monthly unearned income as total income net of job earnings."""
474+ return (df ["TPTOTINC" ].fillna (0 ) - monthly_earned_income ).clip (lower = 0 )
475+
476+
477+ def _approximate_monthly_ssi_countable_income (
478+ monthly_earned_income : pd .Series ,
479+ monthly_unearned_income : pd .Series ,
480+ * ,
481+ general_exclusion : float ,
482+ earned_exclusion : float ,
483+ earned_share_excluded : float ,
484+ ) -> pd .Series :
485+ """Apply standard SSI income exclusions to monthly SIPP income proxies."""
486+ applied_general = np .minimum (general_exclusion , monthly_unearned_income )
487+ countable_unearned = monthly_unearned_income - applied_general
488+ leftover_general = general_exclusion - applied_general
489+
490+ earned_after_flat_exclusions = (
491+ monthly_earned_income - earned_exclusion - leftover_general
492+ ).clip (lower = 0 )
493+ countable_earned = earned_after_flat_exclusions * (1 - earned_share_excluded )
494+
495+ return countable_unearned + countable_earned
496+
497+
462498def _add_ssi_disability_difficulty_predictors (df : pd .DataFrame ) -> None :
463499 for predictor , source_column in SIPP_SSI_DISABILITY_DIFFICULTY_COLUMNS .items ():
464500 df [predictor ] = _yes (df , source_column )
@@ -491,25 +527,35 @@ def _observed_ssi_disability_label_mask(
491527def _ssi_financial_candidate_mask (
492528 df : pd .DataFrame , time_period : int = 2024
493529) -> pd .Series :
494- """Approximate non-disability SSI financial eligibility in SIPP.
530+ """Approximate non-disability SSI screening eligibility in SIPP.
495531
496532 This is only a training-frame screen. It avoids treating people whose
497- resources or income make SSI receipt structurally unlikely as clean
498- non-disabled labels.
533+ resources, countable income, or SGA-level earnings make SSI receipt
534+ structurally unlikely as clean non-disabled labels.
499535 """
500536 try :
501537 from policyengine_us import CountryTaxBenefitSystem
502538
503- p = CountryTaxBenefitSystem ().parameters (f"{ time_period } -01-01" ).gov .ssa .ssi
539+ parameters = CountryTaxBenefitSystem ().parameters (f"{ time_period } -01-01" )
540+ p = parameters .gov .ssa .ssi
504541 individual_resource_limit = float (p .eligibility .resources .limit .individual )
505542 couple_resource_limit = float (p .eligibility .resources .limit .couple )
506543 individual_fbr = float (p .amount .individual )
507544 couple_fbr = float (p .amount .couple )
545+ income_exclusions = p .income .exclusions
546+ general_exclusion = float (income_exclusions .general )
547+ earned_exclusion = float (income_exclusions .earned )
548+ earned_share_excluded = float (income_exclusions .earned_share )
549+ non_blind_sga = float (parameters .gov .ssa .sga .non_blind )
508550 except Exception :
509551 individual_resource_limit = 2_000.0
510552 couple_resource_limit = 3_000.0
511553 individual_fbr = 943.0
512554 couple_fbr = 1_415.0
555+ general_exclusion = 20.0
556+ earned_exclusion = 65.0
557+ earned_share_excluded = 0.5
558+ non_blind_sga = 1_550.0
513559
514560 resource_limit = np .where (
515561 df ["is_married" ].astype (bool ),
@@ -526,9 +572,23 @@ def _ssi_financial_candidate_mask(
526572 + df ["stock_assets" ].fillna (0 )
527573 + df ["bond_assets" ].fillna (0 )
528574 )
529- monthly_income = df ["TPTOTINC" ].fillna (0 )
530- return (liquid_resources <= resource_limit ) & (
531- monthly_income <= monthly_income_limit * 2
575+ monthly_earned_income = _sipp_monthly_earned_income (df )
576+ monthly_unearned_income = _sipp_monthly_unearned_income (df , monthly_earned_income )
577+ monthly_countable_income = _approximate_monthly_ssi_countable_income (
578+ monthly_earned_income ,
579+ monthly_unearned_income ,
580+ general_exclusion = general_exclusion ,
581+ earned_exclusion = earned_exclusion ,
582+ earned_share_excluded = earned_share_excluded ,
583+ )
584+ difficulty_seeing = df .get ("difficulty_seeing" , _yes (df , "ESEEING" ))
585+ is_blind = pd .Series (difficulty_seeing , index = df .index ).fillna (False ).astype (bool )
586+ passes_sga_gate = is_blind | monthly_earned_income .le (non_blind_sga )
587+
588+ return (
589+ (liquid_resources <= resource_limit )
590+ & monthly_countable_income .le (monthly_income_limit )
591+ & passes_sga_gate
532592 )
533593
534594
@@ -544,7 +604,7 @@ def build_ssi_disability_training_frame(
544604 df ["age" ] = df .TAGE
545605 df ["is_female" ] = df .ESEX == 2
546606 df ["is_married" ] = df .EMS == 1
547- df ["employment_income" ] = df . TPTOTINC . fillna ( 0 ) * 12
607+ df ["employment_income" ] = _sipp_monthly_earned_income ( df ) * 12
548608 df ["interest_income" ] = (df ["TINC_BANK" ].fillna (0 ) + df ["TINC_BOND" ].fillna (0 )) * 12
549609 df ["dividend_income" ] = df ["TINC_STMF" ].fillna (0 ) * 12
550610 df ["rental_income" ] = df ["TINC_RENT" ].fillna (0 ) * 12
0 commit comments