@@ -470,6 +470,15 @@ def _period_column(name: str, base_year: int) -> str:
470470 return f"{ name } __{ base_year } "
471471
472472
473+ def _row_values (series : object ) -> np .ndarray :
474+ """Return unweighted row values from a MicroSeries-like object."""
475+ if hasattr (series , "array" ):
476+ return np .asarray (series .array )
477+ if hasattr (series , "values" ):
478+ return np .asarray (series .values )
479+ return np .asarray (series )
480+
481+
473482def classify_archetype (
474483 * ,
475484 head_age : float ,
@@ -520,37 +529,49 @@ def build_tax_unit_summary(
520529 reform : object | None = None ,
521530) -> pd .DataFrame :
522531 sim = Microsimulation (dataset = dataset , reform = reform )
523- input_df = sim .to_input_dataframe ()
532+ # policyengine-core derives all microsimulation weights from household_weight.
533+ # Build the person-row donor summary from that source of truth.
534+ household_weight_at_person_level = _row_values (
535+ sim .calculate ("household_weight" , period = period , map_to = "person" )
536+ ).astype (float )
524537
525538 person_df = pd .DataFrame (
526539 {
527- "tax_unit_id" : sim .calculate ("person_tax_unit_id" , period = period ).values ,
528- "household_id" : sim .calculate ("person_household_id" , period = period ).values ,
529- "age" : sim .calculate ("age" , period = period ).values ,
530- "is_head" : sim .calculate ("is_tax_unit_head" , period = period ).values ,
531- "is_spouse" : sim .calculate ("is_tax_unit_spouse" , period = period ).values ,
532- "is_dependent" : sim .calculate (
533- "is_tax_unit_dependent" , period = period
534- ).values ,
535- "social_security" : sim .calculate ("social_security" , period = period ).values ,
540+ "tax_unit_id" : _row_values (
541+ sim .calculate ("person_tax_unit_id" , period = period )
542+ ),
543+ "household_id" : _row_values (
544+ sim .calculate ("person_household_id" , period = period )
545+ ),
546+ "age" : _row_values (sim .calculate ("age" , period = period )),
547+ "is_head" : _row_values (sim .calculate ("is_tax_unit_head" , period = period )),
548+ "is_spouse" : _row_values (
549+ sim .calculate ("is_tax_unit_spouse" , period = period )
550+ ),
551+ "is_dependent" : _row_values (
552+ sim .calculate ("is_tax_unit_dependent" , period = period )
553+ ),
554+ "social_security" : _row_values (
555+ sim .calculate ("social_security" , period = period )
556+ ),
536557 "payroll" : (
537- sim .calculate (
538- "taxable_earnings_for_social_security" , period = period
539- ).values
540- + sim .calculate (
541- "social_security_taxable_self_employment_income" , period = period
542- ).values
558+ _row_values (
559+ sim .calculate ("taxable_earnings_for_social_security" , period = period )
560+ )
561+ + _row_values (
562+ sim .calculate (
563+ "social_security_taxable_self_employment_income" ,
564+ period = period ,
565+ )
566+ )
567+ ),
568+ "dividend_income" : _row_values (
569+ sim .calculate ("qualified_dividend_income" , period = period )
543570 ),
544- "dividend_income" : sim .calculate (
545- "qualified_dividend_income" , period = period
546- ).values ,
547- "pension_income" : sim .calculate (
548- "taxable_pension_income" , period = period
549- ).values ,
550- "person_weight" : input_df [f"person_weight__{ period } " ].astype (float ).values ,
551- "household_weight" : input_df [f"household_weight__{ period } " ]
552- .astype (float )
553- .values ,
571+ "pension_income" : _row_values (
572+ sim .calculate ("taxable_pension_income" , period = period )
573+ ),
574+ "household_weight" : household_weight_at_person_level ,
554575 }
555576 )
556577
@@ -594,7 +615,6 @@ def build_tax_unit_summary(
594615 "dividend_income" : float (group ["dividend_income" ].sum ()),
595616 "pension_income" : float (group ["pension_income" ].sum ()),
596617 "support_count_weight" : 1.0 ,
597- "person_weight_proxy" : float (group ["person_weight" ].max ()),
598618 "household_weight_proxy" : float (group ["household_weight" ].max ()),
599619 }
600620 row ["archetype" ] = classify_archetype (
@@ -646,11 +666,11 @@ def attach_person_uprating_factors(
646666 else np .zeros (len (df ), dtype = float )
647667 )
648668 uprated_payroll = sum (
649- sim .calculate (component , period = target_year ). values .astype (float )
669+ _row_values ( sim .calculate (component , period = target_year )) .astype (float )
650670 for component in PAYROLL_COMPONENTS
651671 )
652672 uprated_ss = sum (
653- sim .calculate (component , period = target_year ). values .astype (float )
673+ _row_values ( sim .calculate (component , period = target_year )) .astype (float )
654674 for component in SS_COMPONENTS
655675 )
656676 df [PAYROLL_UPRATING_FACTOR_COLUMN ] = np .where (
@@ -666,32 +686,80 @@ def attach_person_uprating_factors(
666686 return df
667687
668688
689+ def _person_level_values (
690+ sim : Microsimulation ,
691+ variable : str ,
692+ * ,
693+ period : int ,
694+ ) -> np .ndarray :
695+ try :
696+ series = sim .calculate (variable , period = period , map_to = "person" )
697+ except Exception :
698+ series = sim .calculate (variable , period = period )
699+ return _row_values (series )
700+
701+
702+ def ensure_person_level_core_inputs (
703+ input_df : pd .DataFrame ,
704+ sim : Microsimulation ,
705+ * ,
706+ base_year : int ,
707+ ) -> pd .DataFrame :
708+ """Fill aliases that newer policyengine-core omits from input exports.
709+
710+ policyengine-core#497 made household_weight the source of truth for all
711+ microsimulation weights and stopped relying on redundant stored person
712+ weights. The support augmentation code still needs person-row IDs so it can
713+ clone donors and assign fresh entity identifiers.
714+ """
715+ df = input_df .copy ()
716+ person_row_count = len (df )
717+ required_person_level_inputs = [
718+ PERSON_ID_COLUMN ,
719+ * (column for columns in ENTITY_ID_COLUMNS .values () for column in columns ),
720+ "household_weight" ,
721+ ]
722+ for variable in required_person_level_inputs :
723+ column = _period_column (variable , base_year )
724+ if column in df .columns :
725+ continue
726+ values = _person_level_values (sim , variable , period = base_year )
727+ if len (values ) != person_row_count :
728+ raise ValueError (
729+ f"Expected { variable } to map to { person_row_count } person rows; "
730+ f"got { len (values )} ."
731+ )
732+ df [column ] = values
733+ df .drop (
734+ columns = [_period_column ("person_weight" , base_year )],
735+ inplace = True ,
736+ errors = "ignore" ,
737+ )
738+ return df
739+
740+
669741def load_base_aggregates (
670742 base_dataset : str ,
671743 * ,
672744 reform : object | None = None ,
673745) -> dict [str , float ]:
674746 sim = Microsimulation (dataset = base_dataset , reform = reform )
675- household_series = sim .calculate (
676- "household_id" , period = BASE_YEAR , map_to = "household"
677- )
678- weights = household_series .weights .values .astype (float )
679- ss = sim .calculate ("social_security" , period = BASE_YEAR , map_to = "household" ).values
680- payroll = (
681- sim .calculate (
682- "taxable_earnings_for_social_security" ,
683- period = BASE_YEAR ,
684- map_to = "household" ,
685- ).values
686- + sim .calculate (
687- "social_security_taxable_self_employment_income" ,
688- period = BASE_YEAR ,
689- map_to = "household" ,
690- ).values
747+ ss = sim .calculate ("social_security" , period = BASE_YEAR , map_to = "household" )
748+ taxable_wages = sim .calculate (
749+ "taxable_earnings_for_social_security" ,
750+ period = BASE_YEAR ,
751+ map_to = "household" ,
752+ )
753+ taxable_self_employment = sim .calculate (
754+ "social_security_taxable_self_employment_income" ,
755+ period = BASE_YEAR ,
756+ map_to = "household" ,
691757 )
692758 return {
693- "weighted_ss_total" : float (np .sum (ss * weights )),
694- "weighted_payroll_total" : float (np .sum (payroll * weights )),
759+ "weighted_ss_total" : float (ss .sum ()),
760+ "weighted_payroll_total" : float (
761+ taxable_wages .sum () + taxable_self_employment .sum ()
762+ ),
695763 }
696764
697765
@@ -1961,7 +2029,6 @@ def _clone_tax_unit_rows_to_target(
19612029) -> tuple [pd .DataFrame , dict [str , int ]] | tuple [None , dict [str , int ]]:
19622030 age_col = _period_column ("age" , base_year )
19632031 household_weight_col = _period_column ("household_weight" , base_year )
1964- person_weight_col = _period_column ("person_weight" , base_year )
19652032 person_id_col = _period_column (PERSON_ID_COLUMN , base_year )
19662033
19672034 adults = donor_rows [donor_rows [age_col ] >= 18 ].sort_values (age_col , ascending = False )
@@ -1975,7 +2042,10 @@ def _clone_tax_unit_rows_to_target(
19752042 ):
19762043 return None , id_counters
19772044
1978- cloned = donor_rows .copy ()
2045+ cloned = donor_rows .drop (
2046+ columns = [_period_column ("person_weight" , base_year )],
2047+ errors = "ignore" ,
2048+ ).copy ()
19792049 household_id = id_counters ["household" ]
19802050 id_counters ["household" ] += 1
19812051 for entity_name , columns in ENTITY_ID_COLUMNS .items ():
@@ -2003,13 +2073,6 @@ def _clone_tax_unit_rows_to_target(
20032073 * clone_weight_scale
20042074 / max (clone_weight_divisor , 1 )
20052075 )
2006- if person_weight_col in cloned .columns :
2007- cloned [person_weight_col ] = (
2008- cloned [person_weight_col ].astype (float )
2009- * clone_weight_scale
2010- / max (clone_weight_divisor , 1 )
2011- )
2012-
20132076 adult_indices = adults .index .tolist ()
20142077 head_idx = adult_indices [0 ]
20152078 spouse_idx = adult_indices [1 ] if target_has_spouse else None
@@ -2115,7 +2178,6 @@ def _compose_role_donor_rows_to_target(
21152178) -> tuple [pd .DataFrame , dict [str , int ]] | tuple [None , dict [str , int ]]:
21162179 age_col = _period_column ("age" , base_year )
21172180 household_weight_col = _period_column ("household_weight" , base_year )
2118- person_weight_col = _period_column ("person_weight" , base_year )
21192181 person_id_col = _period_column (PERSON_ID_COLUMN , base_year )
21202182
21212183 def _adult_rows (df : pd .DataFrame | None ) -> pd .DataFrame :
@@ -2248,6 +2310,11 @@ def _dependent_rows(df: pd.DataFrame | None) -> pd.DataFrame:
22482310 # Reset duplicate donor indices so later row-specific retargeting only touches
22492311 # the intended clone row.
22502312 cloned = pd .DataFrame (selected_rows ).reset_index (drop = True ).copy ()
2313+ cloned .drop (
2314+ columns = [_period_column ("person_weight" , base_year )],
2315+ inplace = True ,
2316+ errors = "ignore" ,
2317+ )
22512318 cloned_sources = pd .Series (selected_sources , index = cloned .index )
22522319 household_id = id_counters ["household" ]
22532320 id_counters ["household" ] += 1
@@ -2276,13 +2343,6 @@ def _dependent_rows(df: pd.DataFrame | None) -> pd.DataFrame:
22762343 * clone_weight_scale
22772344 / max (clone_weight_divisor , 1 )
22782345 )
2279- if person_weight_col in cloned .columns :
2280- cloned [person_weight_col ] = (
2281- cloned [person_weight_col ].astype (float )
2282- * clone_weight_scale
2283- / max (clone_weight_divisor , 1 )
2284- )
2285-
22862346 head_idx = cloned .index [0 ]
22872347 spouse_idx = cloned .index [1 ] if target_candidate .spouse_age is not None else None
22882348 dependent_indices = (
@@ -2401,6 +2461,11 @@ def build_donor_backed_augmented_input_dataframe(
24012461 base_year = base_year ,
24022462 target_year = target_year ,
24032463 )
2464+ input_df = ensure_person_level_core_inputs (
2465+ input_df ,
2466+ sim ,
2467+ base_year = base_year ,
2468+ )
24042469 actual_summary = build_actual_tax_unit_summary (base_dataset , reform = reform )
24052470 base_aggregates = load_base_aggregates (base_dataset , reform = reform )
24062471 ss_scale = load_ssa_benefit_projections (target_year ) / max (
@@ -2552,6 +2617,11 @@ def build_role_composite_augmented_input_dataframe(
25522617 base_year = base_year ,
25532618 target_year = target_year ,
25542619 )
2620+ input_df = ensure_person_level_core_inputs (
2621+ input_df ,
2622+ sim ,
2623+ base_year = base_year ,
2624+ )
25552625 actual_summary = build_actual_tax_unit_summary (base_dataset , reform = reform )
25562626 base_aggregates = load_base_aggregates (base_dataset , reform = reform )
25572627 ss_scale = load_ssa_benefit_projections (target_year ) / max (
0 commit comments