@@ -76,7 +76,6 @@ def _supports_structural_mortgage_inputs() -> bool:
7676 "spm_unit_payroll_tax_reported" ,
7777 "spm_unit_federal_tax_reported" ,
7878 "spm_unit_state_tax_reported" ,
79- "spm_unit_capped_work_childcare_expenses" ,
8079 "spm_unit_spm_threshold" ,
8180 "spm_unit_net_income_reported" ,
8281 "spm_unit_pre_subsidy_childcare_expenses" ,
@@ -326,6 +325,125 @@ def reconcile_ss_subcomponents(predictions, total_ss):
326325}
327326
328327
328+ def derive_clone_capped_childcare_expenses (
329+ donor_pre_subsidy : np .ndarray ,
330+ donor_capped : np .ndarray ,
331+ clone_pre_subsidy : np .ndarray ,
332+ clone_person_data : pd .DataFrame ,
333+ clone_spm_unit_ids : np .ndarray ,
334+ ) -> np .ndarray :
335+ """Derive clone-half capped childcare from clone inputs.
336+
337+ The CPS provides both pre-subsidy childcare and the SPM-specific
338+ capped childcare deduction. For the clone half, we impute only the
339+ pre-subsidy amount, then deterministically rebuild the capped amount
340+ instead of letting a second QRF predict it independently.
341+
342+ We preserve the donor's observed capping share while also respecting
343+ the clone's own earnings cap. This keeps the clone-half value
344+ consistent with pre-subsidy childcare and avoids impossible outputs
345+ such as capped childcare exceeding pre-subsidy childcare.
346+ """
347+
348+ donor_pre_subsidy = np .asarray (donor_pre_subsidy , dtype = float )
349+ donor_capped = np .asarray (donor_capped , dtype = float )
350+ clone_pre_subsidy = np .asarray (clone_pre_subsidy , dtype = float )
351+ clone_spm_unit_ids = np .asarray (clone_spm_unit_ids )
352+
353+ donor_cap_share = np .divide (
354+ donor_capped ,
355+ donor_pre_subsidy ,
356+ out = np .zeros_like (donor_capped , dtype = float ),
357+ where = donor_pre_subsidy > 0 ,
358+ )
359+ donor_cap_share = np .clip (donor_cap_share , 0.0 , 1.0 )
360+ capped_from_share = np .maximum (clone_pre_subsidy , 0.0 ) * donor_cap_share
361+
362+ if clone_person_data .empty :
363+ earnings_cap = np .zeros (len (clone_spm_unit_ids ), dtype = float )
364+ else :
365+ eligible = clone_person_data ["is_parent_proxy" ].astype (bool )
366+ parent_rows = clone_person_data .loc [
367+ eligible , ["spm_unit_id" , "age" , "earnings" ]
368+ ].copy ()
369+ if parent_rows .empty :
370+ earnings_cap = np .zeros (len (clone_spm_unit_ids ), dtype = float )
371+ else :
372+ parent_rows ["earnings" ] = parent_rows ["earnings" ].clip (lower = 0.0 )
373+ parent_rows ["age_rank" ] = parent_rows .groupby ("spm_unit_id" )["age" ].rank (
374+ method = "first" , ascending = False
375+ )
376+ top_two = parent_rows [parent_rows ["age_rank" ] <= 2 ].sort_values (
377+ ["spm_unit_id" , "age_rank" ]
378+ )
379+ earnings_cap_by_unit = top_two .groupby ("spm_unit_id" )["earnings" ].agg (
380+ lambda values : (
381+ float (values .iloc [0 ])
382+ if len (values ) == 1
383+ else float (np .minimum (values .iloc [0 ], values .iloc [1 ]))
384+ )
385+ )
386+ earnings_cap = earnings_cap_by_unit .reindex (
387+ clone_spm_unit_ids , fill_value = 0.0
388+ ).to_numpy (dtype = float )
389+
390+ return np .minimum (capped_from_share , earnings_cap )
391+
392+
393+ def _rebuild_clone_capped_childcare_expenses (
394+ data : dict ,
395+ time_period : int ,
396+ cps_sim ,
397+ ) -> np .ndarray :
398+ """Rebuild clone-half capped childcare expenses after stage-2 imputation."""
399+
400+ n_persons_half = len (data ["person_id" ][time_period ]) // 2
401+ n_spm_units_half = len (data ["spm_unit_id" ][time_period ]) // 2
402+
403+ person_roles = cps_sim .calculate_dataframe (
404+ ["age" , "is_tax_unit_head" , "is_tax_unit_spouse" ]
405+ )
406+ if len (person_roles ) != n_persons_half :
407+ raise ValueError (
408+ "Unexpected person role frame length while rebuilding clone childcare "
409+ f"expenses: got { len (person_roles )} , expected { n_persons_half } "
410+ )
411+
412+ clone_person_data = pd .DataFrame (
413+ {
414+ "spm_unit_id" : data ["person_spm_unit_id" ][time_period ][n_persons_half :],
415+ "age" : person_roles ["age" ].values ,
416+ "is_parent_proxy" : (
417+ person_roles ["is_tax_unit_head" ].values
418+ | person_roles ["is_tax_unit_spouse" ].values
419+ ),
420+ "earnings" : (
421+ data ["employment_income" ][time_period ][n_persons_half :]
422+ + data ["self_employment_income" ][time_period ][n_persons_half :]
423+ ),
424+ }
425+ )
426+
427+ donor_pre_subsidy = data ["spm_unit_pre_subsidy_childcare_expenses" ][time_period ][
428+ :n_spm_units_half
429+ ]
430+ donor_capped = data ["spm_unit_capped_work_childcare_expenses" ][time_period ][
431+ :n_spm_units_half
432+ ]
433+ clone_pre_subsidy = data ["spm_unit_pre_subsidy_childcare_expenses" ][time_period ][
434+ n_spm_units_half :
435+ ]
436+ clone_spm_unit_ids = data ["spm_unit_id" ][time_period ][n_spm_units_half :]
437+
438+ return derive_clone_capped_childcare_expenses (
439+ donor_pre_subsidy = donor_pre_subsidy ,
440+ donor_capped = donor_capped ,
441+ clone_pre_subsidy = clone_pre_subsidy ,
442+ clone_person_data = clone_person_data ,
443+ clone_spm_unit_ids = clone_spm_unit_ids ,
444+ )
445+
446+
329447def _apply_post_processing (predictions , X_test , time_period , data ):
330448 """Apply retirement constraints and SS reconciliation."""
331449 ret_cols = [c for c in predictions .columns if c in _RETIREMENT_VARS ]
@@ -430,6 +548,24 @@ def _splice_cps_only_predictions(
430548 new_values = np .concatenate ([cps_half , pred_values ])
431549 data [var ] = {time_period : new_values }
432550
551+ if (
552+ "spm_unit_capped_work_childcare_expenses" in data
553+ and "spm_unit_pre_subsidy_childcare_expenses" in data
554+ ):
555+ n_half = entity_half_lengths .get (
556+ "spm_unit" ,
557+ len (data ["spm_unit_capped_work_childcare_expenses" ][time_period ]) // 2 ,
558+ )
559+ cps_half = data ["spm_unit_capped_work_childcare_expenses" ][time_period ][:n_half ]
560+ clone_half = _rebuild_clone_capped_childcare_expenses (
561+ data = data ,
562+ time_period = time_period ,
563+ cps_sim = cps_sim ,
564+ )
565+ data ["spm_unit_capped_work_childcare_expenses" ] = {
566+ time_period : np .concatenate ([cps_half , clone_half ])
567+ }
568+
433569 del cps_sim
434570 return data
435571
0 commit comments