|
217 | 217 | } |
218 | 218 | ) |
219 | 219 |
|
| 220 | +# Refresh categorical/status fields against the PUF income surface, but never |
| 221 | +# overwrite amount fields here. PUF and CPS income amounts must come from donor |
| 222 | +# imputation/calibration, not from post-hoc bucket or nearest-neighbor surgery. |
220 | 223 | PUF_SUPPORT_CLONE_CPS_REFRESH_VARIABLES: tuple[str, ...] = ( |
221 | 224 | "is_male", |
222 | 225 | "cps_race", |
|
231 | 234 | "difficulty_doing_errands", |
232 | 235 | "difficulty_remembering_or_making_decisions", |
233 | 236 | "meets_ssi_disability_criteria", |
234 | | - "social_security_retirement", |
235 | | - "social_security_disability", |
236 | | - "social_security_survivors", |
237 | | - "social_security_dependents", |
238 | | - "disability_benefits", |
239 | | - "workers_compensation", |
240 | | - "unemployment_compensation", |
241 | | - "child_support_received", |
242 | | - "veterans_benefits", |
243 | | - "educational_assistance", |
244 | | - "financial_assistance", |
245 | | - "survivor_benefits", |
246 | | - "strike_benefits", |
247 | 237 | "receives_wic", |
248 | 238 | "receives_housing_assistance", |
249 | | - "spm_unit_energy_subsidy", |
250 | | - "spm_unit_pre_subsidy_childcare_expenses", |
251 | | - "employer_sponsored_insurance_premiums", |
252 | | - "health_insurance_premiums_without_medicare_part_b", |
253 | | - "other_health_insurance_premiums", |
254 | | - "over_the_counter_health_expenses", |
255 | | - "other_medical_expenses", |
256 | | - "child_support_expense", |
257 | | - "weekly_hours_worked", |
258 | | - "hours_worked", |
259 | | - "hours_worked_last_week", |
260 | | - "weekly_hours_worked_before_lsr", |
261 | | - "weeks_worked", |
262 | | - "hourly_wage", |
263 | 239 | "is_paid_hourly", |
264 | 240 | "is_union_member_or_covered", |
265 | | - "employment_income_last_year", |
266 | | - "self_employment_income_last_year", |
267 | | - "taxable_401k_distributions", |
268 | | - "tax_exempt_401k_distributions", |
269 | | - "taxable_403b_distributions", |
270 | | - "tax_exempt_403b_distributions", |
271 | | - "keogh_distributions", |
272 | | - "taxable_sep_distributions", |
273 | | - "tax_exempt_sep_distributions", |
274 | | - "traditional_401k_contributions_desired", |
275 | | - "roth_401k_contributions_desired", |
276 | | - "traditional_ira_contributions_desired", |
277 | | - "roth_ira_contributions_desired", |
278 | | - "self_employed_pension_contributions_desired", |
279 | | -) |
280 | | - |
281 | | -PUF_SUPPORT_CLONE_TOP_TAIL_ROUGH_AGI_CAP = 78_999_999.0 |
282 | | -PUF_SUPPORT_CLONE_TOP_TAIL_ROUGH_AGI_VARIABLES: tuple[str, ...] = ( |
283 | | - "employment_income", |
284 | | - "employment_income_before_lsr", |
285 | | - "tip_income", |
286 | | - "fsla_overtime_premium", |
287 | | - "self_employment_income", |
288 | | - "self_employment_income_before_lsr", |
289 | | - "taxable_interest_income", |
290 | | - "tax_exempt_interest_income", |
291 | | - "capital_gains", |
292 | | - "long_term_capital_gains_before_response", |
293 | | - "long_term_capital_gains", |
294 | | - "short_term_capital_gains", |
295 | | - "non_sch_d_capital_gains", |
296 | | - "dividend_income", |
297 | | - "ordinary_dividend_income", |
298 | | - "qualified_dividend_income", |
299 | | - "non_qualified_dividend_income", |
300 | | - "partnership_s_corp_income", |
301 | | - "rental_income", |
302 | | - "farm_income", |
303 | | - "farm_operations_income", |
304 | | - "farm_rent_income", |
305 | | - "ira_distributions", |
306 | | - "taxable_pension_income", |
307 | | - "taxable_private_pension_income", |
308 | | - "taxable_ira_distributions", |
309 | | - "taxable_401k_distributions", |
310 | | - "taxable_403b_distributions", |
311 | | - "taxable_sep_distributions", |
312 | | - "total_pension_income", |
313 | | - "taxable_social_security", |
314 | | - "social_security", |
315 | | - "social_security_retirement", |
316 | | - "social_security_disability", |
317 | | - "social_security_survivors", |
318 | | - "social_security_dependents", |
319 | | -) |
320 | | -PUF_SUPPORT_CLONE_TOP_TAIL_SCALE_VARIABLES: tuple[str, ...] = ( |
321 | | - "capital_gains", |
322 | | - "long_term_capital_gains_before_response", |
323 | | - "long_term_capital_gains", |
324 | | - "short_term_capital_gains", |
325 | | - "non_sch_d_capital_gains", |
326 | | - "partnership_s_corp_income", |
327 | | - "dividend_income", |
328 | | - "qualified_dividend_income", |
329 | | - "non_qualified_dividend_income", |
330 | | - "ordinary_dividend_income", |
331 | | - "taxable_interest_income", |
332 | | - "tax_exempt_interest_income", |
333 | 241 | ) |
334 | 242 |
|
335 | 243 | DEFAULT_ACA_TAKEUP_RATE = 0.672 |
@@ -2143,15 +2051,6 @@ class USMicroplexBuildConfig: |
2143 | 2051 | puf_support_clone_cps_refresh_condition_variables: tuple[str, ...] = ( |
2144 | 2052 | PUF_SUPPORT_CLONE_CPS_REFRESH_CONDITION_VARIABLES |
2145 | 2053 | ) |
2146 | | - puf_support_clone_top_tail_rough_agi_cap: float | None = ( |
2147 | | - PUF_SUPPORT_CLONE_TOP_TAIL_ROUGH_AGI_CAP |
2148 | | - ) |
2149 | | - puf_support_clone_top_tail_rough_agi_variables: tuple[str, ...] = ( |
2150 | | - PUF_SUPPORT_CLONE_TOP_TAIL_ROUGH_AGI_VARIABLES |
2151 | | - ) |
2152 | | - puf_support_clone_top_tail_scale_variables: tuple[str, ...] = ( |
2153 | | - PUF_SUPPORT_CLONE_TOP_TAIL_SCALE_VARIABLES |
2154 | | - ) |
2155 | 2054 | dependent_tax_leaf_soft_cap_multiplier: float | None = None |
2156 | 2055 | dependent_tax_leaf_soft_cap_base_variables: tuple[str, ...] = ( |
2157 | 2056 | "employment_income", |
@@ -5791,194 +5690,6 @@ def _reconcile_puf_support_clone_social_security( |
5791 | 5690 | ] = total.loc[fallback_mask & age.lt(62)] |
5792 | 5691 | return subcomponents |
5793 | 5692 |
|
5794 | | - def _puf_support_clone_top_tail_rough_agi( |
5795 | | - self, |
5796 | | - clone: pd.DataFrame, |
5797 | | - ) -> tuple[pd.Series, list[str]]: |
5798 | | - """Compute a nonredundant rough AGI proxy for PUF clone top-tail checks.""" |
5799 | | - |
5800 | | - configured = set(self.config.puf_support_clone_top_tail_rough_agi_variables) |
5801 | | - |
5802 | | - def numeric(variable: str) -> pd.Series: |
5803 | | - return ( |
5804 | | - pd.to_numeric(clone[variable], errors="coerce") |
5805 | | - .replace([np.inf, -np.inf], np.nan) |
5806 | | - .fillna(0.0) |
5807 | | - ) |
5808 | | - |
5809 | | - components: list[pd.Series] = [] |
5810 | | - variables: list[str] = [] |
5811 | | - |
5812 | | - def add(variable: str) -> bool: |
5813 | | - if variable not in configured or variable not in clone.columns: |
5814 | | - return False |
5815 | | - components.append(numeric(variable)) |
5816 | | - variables.append(variable) |
5817 | | - return True |
5818 | | - |
5819 | | - def add_first(*variables: str) -> bool: |
5820 | | - return any(add(variable) for variable in variables) |
5821 | | - |
5822 | | - def add_all(*variables: str) -> bool: |
5823 | | - added = False |
5824 | | - for variable in variables: |
5825 | | - added = add(variable) or added |
5826 | | - return added |
5827 | | - |
5828 | | - add_first("employment_income", "employment_income_before_lsr") |
5829 | | - if "employment_income" not in variables: |
5830 | | - add_all("tip_income", "fsla_overtime_premium") |
5831 | | - |
5832 | | - add_first("self_employment_income", "self_employment_income_before_lsr") |
5833 | | - |
5834 | | - for variable in ( |
5835 | | - "taxable_interest_income", |
5836 | | - "tax_exempt_interest_income", |
5837 | | - "partnership_s_corp_income", |
5838 | | - "rental_income", |
5839 | | - ): |
5840 | | - add(variable) |
5841 | | - |
5842 | | - if not add("farm_income"): |
5843 | | - add_all("farm_operations_income", "farm_rent_income") |
5844 | | - |
5845 | | - added_capital_gain_components = False |
5846 | | - if add("long_term_capital_gains_before_response"): |
5847 | | - added_capital_gain_components = True |
5848 | | - elif add("long_term_capital_gains"): |
5849 | | - added_capital_gain_components = True |
5850 | | - if add("short_term_capital_gains"): |
5851 | | - added_capital_gain_components = True |
5852 | | - if not added_capital_gain_components: |
5853 | | - add("capital_gains") |
5854 | | - add("non_sch_d_capital_gains") |
5855 | | - |
5856 | | - if not add("dividend_income") and not add("ordinary_dividend_income"): |
5857 | | - add("qualified_dividend_income") |
5858 | | - add("non_qualified_dividend_income") |
5859 | | - |
5860 | | - if not add("taxable_pension_income") and not add("total_pension_income"): |
5861 | | - add_all( |
5862 | | - "ira_distributions", |
5863 | | - "taxable_private_pension_income", |
5864 | | - "taxable_ira_distributions", |
5865 | | - "taxable_401k_distributions", |
5866 | | - "taxable_403b_distributions", |
5867 | | - "taxable_sep_distributions", |
5868 | | - ) |
5869 | | - if not add("taxable_social_security") and not add("social_security"): |
5870 | | - add_all( |
5871 | | - "social_security_retirement", |
5872 | | - "social_security_disability", |
5873 | | - "social_security_survivors", |
5874 | | - "social_security_dependents", |
5875 | | - ) |
5876 | | - |
5877 | | - if not components: |
5878 | | - return pd.Series(0.0, index=clone.index, dtype=float), [] |
5879 | | - return sum(components), variables |
5880 | | - |
5881 | | - def _apply_puf_support_clone_top_tail_guard( |
5882 | | - self, |
5883 | | - clone: pd.DataFrame, |
5884 | | - *, |
5885 | | - integrated_variables: Iterable[str], |
5886 | | - ) -> tuple[pd.DataFrame, dict[str, Any]]: |
5887 | | - """Avoid arbitrary state placement of unsupported PUF top-tail clones. |
5888 | | -
|
5889 | | - PUF has no state geography, so the CPS support clone inherits state from |
5890 | | - its scaffold row. Until the top tail gets state-aware support records, |
5891 | | - do not let a single imputed clone enter the open-ended SOI AGI count bin |
5892 | | - and then receive a large calibrated state weight. |
5893 | | - """ |
5894 | | - |
5895 | | - cap = self.config.puf_support_clone_top_tail_rough_agi_cap |
5896 | | - summary: dict[str, Any] = { |
5897 | | - "enabled": cap is not None, |
5898 | | - "cap": float(cap) if cap is not None else None, |
5899 | | - "affected_rows": 0, |
5900 | | - "rough_agi_variables": [], |
5901 | | - "scaled_variables": [], |
5902 | | - "scale_basis_variables": [], |
5903 | | - "max_rough_agi_before": None, |
5904 | | - "max_rough_agi_after": None, |
5905 | | - } |
5906 | | - if cap is None or cap <= 0.0 or clone.empty: |
5907 | | - return clone, summary |
5908 | | - |
5909 | | - rough_agi, rough_agi_variables = self._puf_support_clone_top_tail_rough_agi( |
5910 | | - clone |
5911 | | - ) |
5912 | | - if not rough_agi_variables: |
5913 | | - return clone, summary |
5914 | | - |
5915 | | - summary["rough_agi_variables"] = rough_agi_variables |
5916 | | - summary["max_rough_agi_before"] = float(rough_agi.max()) |
5917 | | - |
5918 | | - over_cap = rough_agi > float(cap) |
5919 | | - if not bool(over_cap.any()): |
5920 | | - summary["max_rough_agi_after"] = summary["max_rough_agi_before"] |
5921 | | - return clone, summary |
5922 | | - |
5923 | | - integrated_set = set(integrated_variables) |
5924 | | - |
5925 | | - def is_integrated_or_export_alias(variable: str) -> bool: |
5926 | | - if variable in integrated_set: |
5927 | | - return True |
5928 | | - return ( |
5929 | | - variable == "long_term_capital_gains_before_response" |
5930 | | - and "long_term_capital_gains" in integrated_set |
5931 | | - ) |
5932 | | - |
5933 | | - scale_variables = [ |
5934 | | - variable |
5935 | | - for variable in self.config.puf_support_clone_top_tail_scale_variables |
5936 | | - if variable in clone.columns and is_integrated_or_export_alias(variable) |
5937 | | - ] |
5938 | | - if not scale_variables: |
5939 | | - summary["max_rough_agi_after"] = summary["max_rough_agi_before"] |
5940 | | - return clone, summary |
5941 | | - scale_basis_variables = [ |
5942 | | - variable for variable in scale_variables if variable in rough_agi_variables |
5943 | | - ] |
5944 | | - if not scale_basis_variables: |
5945 | | - summary["max_rough_agi_after"] = summary["max_rough_agi_before"] |
5946 | | - return clone, summary |
5947 | | - |
5948 | | - scale_frame = pd.DataFrame( |
5949 | | - { |
5950 | | - variable: pd.to_numeric(clone[variable], errors="coerce") |
5951 | | - .replace([np.inf, -np.inf], np.nan) |
5952 | | - .fillna(0.0) |
5953 | | - .clip(lower=0.0) |
5954 | | - for variable in scale_basis_variables |
5955 | | - }, |
5956 | | - index=clone.index, |
5957 | | - ) |
5958 | | - scalable = scale_frame.sum(axis=1) |
5959 | | - nonscalable = rough_agi - scalable |
5960 | | - desired_scalable = (float(cap) - nonscalable).clip(lower=0.0) |
5961 | | - eligible = over_cap & scalable.gt(0.0) |
5962 | | - if not bool(eligible.any()): |
5963 | | - summary["max_rough_agi_after"] = summary["max_rough_agi_before"] |
5964 | | - return clone, summary |
5965 | | - |
5966 | | - scale = (desired_scalable[eligible] / scalable[eligible]).clip( |
5967 | | - lower=0.0, |
5968 | | - upper=1.0, |
5969 | | - ) |
5970 | | - guarded = clone.copy() |
5971 | | - for variable in scale_variables: |
5972 | | - values = pd.to_numeric(guarded.loc[eligible, variable], errors="coerce") |
5973 | | - guarded.loc[eligible, variable] = values.fillna(0.0).clip(lower=0.0) * scale |
5974 | | - |
5975 | | - guarded_rough_agi, _ = self._puf_support_clone_top_tail_rough_agi(guarded) |
5976 | | - summary["affected_rows"] = int(eligible.sum()) |
5977 | | - summary["scaled_variables"] = scale_variables |
5978 | | - summary["scale_basis_variables"] = scale_basis_variables |
5979 | | - summary["max_rough_agi_after"] = float(guarded_rough_agi.max()) |
5980 | | - return guarded, summary |
5981 | | - |
5982 | 5693 | def _finalize_puf_support_clone_frame( |
5983 | 5694 | self, |
5984 | 5695 | *, |
@@ -6013,10 +5724,6 @@ def _finalize_puf_support_clone_frame( |
6013 | 5724 | integrated_variables=integrated_variables, |
6014 | 5725 | preclone_columns=preclone_columns, |
6015 | 5726 | ) |
6016 | | - clone, top_tail_guard_summary = self._apply_puf_support_clone_top_tail_guard( |
6017 | | - clone, |
6018 | | - integrated_variables=integrated_variables, |
6019 | | - ) |
6020 | 5727 |
|
6021 | 5728 | generated_entity_id_columns = sorted( |
6022 | 5729 | set(ENTITY_ID_COLUMNS.values()) & (set(clone.columns) - preclone_columns) |
@@ -6075,7 +5782,6 @@ def _finalize_puf_support_clone_frame( |
6075 | 5782 | "donor_only_variables": donor_only_variables, |
6076 | 5783 | "both_halves_override_variables": sorted(both_halves_override), |
6077 | 5784 | "cps_only_refresh": cps_refresh_summary, |
6078 | | - "top_tail_guard": top_tail_guard_summary, |
6079 | 5785 | "dropped_generated_entity_id_columns": generated_entity_id_columns, |
6080 | 5786 | "variable_surface": { |
6081 | 5787 | "ecps_imputed_variables": list(PUF_SUPPORT_CLONE_IMPUTED_VARIABLES), |
|
0 commit comments