@@ -227,10 +227,14 @@ def build_forbes_top_tail_artifact(
227227 target_n = target_n ,
228228 scf_donors = scf_donors ,
229229 )
230+ if selected_forbes .empty :
231+ raise ValueError ("Forbes backbone produced no eligible units." )
230232 if len (selected_forbes ) < target_n :
231- raise ValueError (
232- "Forbes backbone produced only "
233- f"{ len (selected_forbes )} eligible units for target { target_n } ."
233+ logger .warning (
234+ "Forbes backbone produced %s eligible units for target %s; "
235+ "scaling replicate weights to the aggregate-row population." ,
236+ len (selected_forbes ),
237+ target_n ,
234238 )
235239
236240 forbes_draws = expand_forbes_replicates (
@@ -265,16 +269,16 @@ def build_forbes_top_tail_artifact(
265269 next_recid + len (scf_draws ),
266270 dtype = int ,
267271 )
268- synthetic ["S006" ] = config .unit_weight_hundredths
272+ synthetic_weight_hundredths = _scaled_replicate_weight_hundredths (
273+ total_units = target_n ,
274+ row_count = len (scf_draws ),
275+ )
276+ synthetic ["S006" ] = synthetic_weight_hundredths
269277
270278 utils ._apply_structural_templates (synthetic , donor_templates )
271279 apply_forbes_structural_overrides (synthetic , scf_draws )
272280
273- synthetic_weights = np .full (
274- len (scf_draws ),
275- 1.0 / config .replicate_count ,
276- dtype = float ,
277- )
281+ synthetic_weights = synthetic_weight_hundredths .astype (float ) / 100
278282 utils ._calibrate_amount_columns (
279283 synthetic = synthetic ,
280284 selected = puf_priors ,
@@ -285,7 +289,7 @@ def build_forbes_top_tail_artifact(
285289 amount_columns = amount_columns ,
286290 synthetic_weights = synthetic_weights ,
287291 )
288- synthetic ["S006" ] = config . unit_weight_hundredths
292+ synthetic ["S006" ] = synthetic_weight_hundredths
289293
290294 artifact = ForbesTopTailArtifact (
291295 source_forbes = source_forbes ,
@@ -343,6 +347,34 @@ def build_forbes_top_tail_diagnostics(
343347 }
344348
345349
350+ def _scaled_replicate_weight_hundredths (
351+ total_units : int ,
352+ row_count : int ,
353+ ) -> np .ndarray :
354+ """Return integer hundredth weights that exactly sum to a unit target."""
355+
356+ if total_units <= 0 :
357+ raise ValueError ("Forbes synthetic target units must be positive." )
358+ if row_count <= 0 :
359+ raise ValueError ("Forbes synthetic row count must be positive." )
360+
361+ total_hundredths = int (total_units * 100 )
362+ base = total_hundredths // row_count
363+ remainder = total_hundredths - base * row_count
364+ if base <= 0 :
365+ raise ValueError (
366+ "Forbes synthetic row count exceeds available hundredth weights."
367+ )
368+
369+ weights = np .full (row_count , base , dtype = int )
370+ if remainder :
371+ remainder_positions = (
372+ np .arange (remainder , dtype = np .int64 ) * row_count // remainder
373+ )
374+ weights [remainder_positions ] += 1
375+ return weights
376+
377+
346378def build_forbes_top_tail_diagnostic_tables (
347379 artifact : ForbesTopTailArtifact ,
348380 row : pd .Series ,
@@ -509,12 +541,15 @@ def validate_forbes_top_tail_artifact(
509541
510542 config .validate ()
511543 expected_units = int (round (pop_weight ))
512- expected_draws = expected_units * config .replicate_count
544+ selected_units = len (artifact .selected_forbes )
545+ expected_draws = selected_units * config .replicate_count
513546
514- if len (artifact .selected_forbes ) != expected_units :
547+ if selected_units <= 0 :
548+ raise ValueError ("Forbes artifact selected no units." )
549+ if selected_units > expected_units :
515550 raise ValueError (
516551 "Forbes artifact selected "
517- f"{ len ( artifact . selected_forbes ) } units for target { expected_units } ."
552+ f"{ selected_units } units for target { expected_units } ."
518553 )
519554 for name , frame in {
520555 "scf_draws" : artifact .scf_draws ,
@@ -534,8 +569,8 @@ def validate_forbes_top_tail_artifact(
534569 "Forbes synthetic weights sum to "
535570 f"{ synthetic_weight } ; expected { expected_units } ."
536571 )
537- if not artifact .synthetic ["S006" ]. eq ( config . unit_weight_hundredths ).all ():
538- raise ValueError ("Forbes synthetic replicate weights are not uniform ." )
572+ if not ( artifact .synthetic ["S006" ] > 0 ).all ():
573+ raise ValueError ("Forbes synthetic replicate weights must be positive ." )
539574
540575 required_columns = {"RECID" , "S006" , "E00100" , * amount_columns }
541576 missing_columns = required_columns .difference (artifact .synthetic .columns )
@@ -554,7 +589,7 @@ def validate_forbes_top_tail_artifact(
554589
555590 weights = artifact .synthetic ["S006" ].to_numpy (dtype = float ) / 100
556591 for column in amount_columns :
557- target_total = pop_weight * float (row .get (column , 0.0 ))
592+ target_total = pop_weight * utils . _finite_amount (row .get (column , 0.0 ))
558593 actual_total = float (
559594 np .dot (artifact .synthetic [column ].to_numpy (dtype = float ), weights )
560595 )
@@ -957,7 +992,7 @@ def score_forbes_selection_with_scf(
957992 for row in prepared_forbes .itertuples (index = False ):
958993 candidates = scf_candidates_for_receiver (scf_donors , row )
959994 probabilities = scf_match_probabilities (candidates , row )
960- agi_values = scf_implied_agi_values (candidates , row )
995+ agi_values = scf_wealth_ratio_agi_values (candidates , row )
961996 tail_probabilities .append (
962997 float (probabilities [agi_values >= FORBES_TOP_TAIL_AGI_THRESHOLD ].sum ())
963998 )
@@ -966,10 +1001,6 @@ def score_forbes_selection_with_scf(
9661001 scored = prepared_forbes .copy ()
9671002 scored ["scf_tail_probability" ] = tail_probabilities
9681003 scored ["scf_expected_agi" ] = expected_agi
969- scored ["estimated_agi" ] = np .maximum (
970- scored ["scf_expected_agi" ].to_numpy (dtype = float ),
971- 1.0 ,
972- )
9731004 return scored
9741005
9751006
@@ -1068,24 +1099,56 @@ def scf_implied_component_values(
10681099 candidates : pd .DataFrame ,
10691100 receiver ,
10701101) -> dict [str , np .ndarray ]:
1071- """Scale SCF donor ratios up to one Forbes receiver's wealth level."""
1102+ """Scale SCF donor income composition to one Forbes receiver's AGI level."""
10721103
1073- networth = float (getattr (receiver , "networth_dollars" , 0.0 ))
1104+ receiver_agi = _receiver_estimated_agi (receiver )
1105+ employment_base = np .maximum (
1106+ 0.0 ,
1107+ candidates ["wageinc" ].to_numpy (dtype = float ),
1108+ )
1109+ capital_gains_base = candidates ["kginc" ].to_numpy (dtype = float )
1110+ interest_dividend_base = np .maximum (
1111+ 0.0 ,
1112+ candidates ["intdivinc" ].to_numpy (dtype = float ),
1113+ )
1114+ business_farm_base = candidates ["bussefarminc" ].to_numpy (dtype = float )
1115+ pension_base = np .maximum (
1116+ 0.0 ,
1117+ candidates ["ssretinc" ].to_numpy (dtype = float ),
1118+ )
1119+ donor_agi_base = (
1120+ employment_base
1121+ + capital_gains_base
1122+ + interest_dividend_base
1123+ + business_farm_base
1124+ + 0.5 * pension_base
1125+ )
1126+ donor_abs_income_base = (
1127+ np .abs (employment_base )
1128+ + np .abs (capital_gains_base )
1129+ + np .abs (interest_dividend_base )
1130+ + np .abs (business_farm_base )
1131+ + 0.5 * np .abs (pension_base )
1132+ )
1133+ scale_base = np .where (
1134+ donor_agi_base > 1.0 ,
1135+ donor_agi_base ,
1136+ np .maximum (donor_abs_income_base , 1.0 ),
1137+ )
1138+ scale = receiver_agi / scale_base
10741139 employment_income = np .maximum (
10751140 0.0 ,
1076- networth * candidates [ "wageinc_ratio" ]. to_numpy ( dtype = float ) ,
1141+ employment_base * scale ,
10771142 )
1078- capital_gains = networth * candidates [ "kginc_ratio" ]. to_numpy ( dtype = float )
1143+ capital_gains = capital_gains_base * scale
10791144 interest_dividend_income = np .maximum (
10801145 0.0 ,
1081- networth * candidates ["intdivinc_ratio" ].to_numpy (dtype = float ),
1082- )
1083- business_farm_income = networth * candidates ["bussefarminc_ratio" ].to_numpy (
1084- dtype = float
1146+ interest_dividend_base * scale ,
10851147 )
1148+ business_farm_income = business_farm_base * scale
10861149 pension_income = np .maximum (
10871150 0.0 ,
1088- networth * candidates [ "ssretinc_ratio" ]. to_numpy ( dtype = float ) ,
1151+ pension_base * scale ,
10891152 )
10901153 agi = np .maximum (
10911154 employment_income
@@ -1105,6 +1168,52 @@ def scf_implied_component_values(
11051168 }
11061169
11071170
1171+ def scf_wealth_ratio_agi_values (
1172+ candidates : pd .DataFrame ,
1173+ receiver ,
1174+ ) -> np .ndarray :
1175+ """Return wealth-ratio AGI values for selection only, not amount priors."""
1176+
1177+ networth = float (getattr (receiver , "networth_dollars" , 0.0 ))
1178+ employment_income = np .maximum (
1179+ 0.0 ,
1180+ networth * candidates ["wageinc_ratio" ].to_numpy (dtype = float ),
1181+ )
1182+ capital_gains = networth * candidates ["kginc_ratio" ].to_numpy (dtype = float )
1183+ interest_dividend_income = np .maximum (
1184+ 0.0 ,
1185+ networth * candidates ["intdivinc_ratio" ].to_numpy (dtype = float ),
1186+ )
1187+ business_farm_income = networth * candidates ["bussefarminc_ratio" ].to_numpy (
1188+ dtype = float
1189+ )
1190+ pension_income = np .maximum (
1191+ 0.0 ,
1192+ networth * candidates ["ssretinc_ratio" ].to_numpy (dtype = float ),
1193+ )
1194+ return np .maximum (
1195+ employment_income
1196+ + capital_gains
1197+ + interest_dividend_income
1198+ + business_farm_income
1199+ + 0.5 * pension_income ,
1200+ 0.0 ,
1201+ )
1202+
1203+
1204+ def _receiver_estimated_agi (receiver ) -> float :
1205+ """Return the Forbes receiver AGI anchor used for SCF composition draws."""
1206+
1207+ estimated_agi = float (getattr (receiver , "estimated_agi" , np .nan ))
1208+ if np .isfinite (estimated_agi ) and estimated_agi > 0 :
1209+ return estimated_agi
1210+
1211+ networth = float (getattr (receiver , "networth_dollars" , 0.0 ))
1212+ agi_ratio = float (getattr (receiver , "agi_ratio" , DEFAULT_PROFILE .agi_ratio ))
1213+ self_made_scale = 1.05 if bool (getattr (receiver , "self_made_flag" , False )) else 0.95
1214+ return max (networth * agi_ratio * self_made_scale , 1.0 )
1215+
1216+
11081217def scf_implied_agi_values (
11091218 candidates : pd .DataFrame ,
11101219 receiver ,
@@ -1530,7 +1639,7 @@ def _build_calibration_diagnostics(
15301639) -> pd .DataFrame :
15311640 rows = []
15321641 for column in amount_columns :
1533- target_total = pop_weight * float (row .get (column , 0.0 ))
1642+ target_total = pop_weight * utils . _finite_amount (row .get (column , 0.0 ))
15341643 synthetic_total = _weighted_columns_total (synthetic , (column ,), weights )
15351644 absolute_error = synthetic_total - target_total
15361645 if abs (target_total ) > ARTIFACT_NUMERIC_TOL :
@@ -1577,7 +1686,7 @@ def _build_composition_diagnostics(
15771686 continue
15781687
15791688 target_total = pop_weight * sum (
1580- float (row .get (column , 0.0 )) for column in columns
1689+ utils . _finite_amount (row .get (column , 0.0 )) for column in columns
15811690 )
15821691 synthetic_total = _weighted_columns_total (
15831692 artifact .synthetic ,
0 commit comments