Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/1149.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Constrain ECPS calibration to the source household count so PUF clone reweighting cannot inflate total household weight.
6 changes: 6 additions & 0 deletions policyengine_us_data/datasets/cps/enhanced_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas as pd
from policyengine_us_data.utils import (
ABSOLUTE_ERROR_SCALE_TARGETS,
HOUSEHOLD_COUNT_TARGET,
build_loss_matrix,
get_target_error_normalisation,
get_target_loss_weights,
Expand Down Expand Up @@ -669,6 +670,11 @@ def generate(self):
del loss_matrix, targets_array
gc.collect()
assert loss_matrix_clean.shape[1] == targets_array_clean.size
if HOUSEHOLD_COUNT_TARGET not in loss_matrix_clean.columns:
raise ValueError(
f"{HOUSEHOLD_COUNT_TARGET} missing from EnhancedCPS "
"calibration targets"
)

loss_matrix_clean = loss_matrix_clean.astype(np.float32)

Expand Down
1 change: 1 addition & 0 deletions policyengine_us_data/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
__all__ = [
"ABSOLUTE_ERROR_SCALE_TARGETS",
"HardConcrete",
"HOUSEHOLD_COUNT_TARGET",
"build_loss_matrix",
"get_target_error_normalisation",
"get_target_loss_weights",
Expand Down
34 changes: 34 additions & 0 deletions policyengine_us_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@

BEA_NIPA_DIRECT_SUM_LOSS_WEIGHT = 1_000.0
BEA_WAGES_AND_SALARIES_LOSS_WEIGHT = 1_000.0
HOUSEHOLD_COUNT_TARGET = "nation/source/household_count"
HOUSEHOLD_COUNT_LOSS_WEIGHT = 1_000.0

CBO_INCOME_BY_SOURCE_TARGETS = [
("irs_employment_income", "employment_income"),
Expand Down Expand Up @@ -1199,6 +1201,31 @@ def _add_transfer_balance_targets(loss_matrix, targets_list, sim, time_period):
return targets_list, loss_matrix


def _add_household_count_target(loss_matrix, targets_list, sim):
"""Constrain total household weight to the source survey total."""

household_weights = sim.calculate("household_weight").values
if len(loss_matrix) != len(household_weights):
raise ValueError(
"Household count target length mismatch: "
f"loss matrix has {len(loss_matrix)} rows but household_weight has "
f"{len(household_weights)} values"
)

target = float(np.sum(household_weights))
if not np.isfinite(target) or target <= 0:
raise ValueError(
"Household count target must have positive finite source weight total"
)

loss_matrix[HOUSEHOLD_COUNT_TARGET] = np.ones(
len(household_weights),
dtype=np.float32,
)
targets_list.append(target)
return targets_list, loss_matrix


def get_target_error_normalisation(target_names, targets_array):
"""Return numerator shifts and denominators for target loss scaling."""
target_names = np.asarray(target_names)
Expand Down Expand Up @@ -1227,6 +1254,7 @@ def get_target_loss_weights(target_names):
) | np.char.startswith(target_names, "state/bea/wages_and_salaries/")
weights[is_bea_direct_sum_target] = BEA_NIPA_DIRECT_SUM_LOSS_WEIGHT
weights[is_bea_wage_target] = BEA_WAGES_AND_SALARIES_LOSS_WEIGHT
weights[target_names == HOUSEHOLD_COUNT_TARGET] = HOUSEHOLD_COUNT_LOSS_WEIGHT
return weights


Expand Down Expand Up @@ -1360,6 +1388,12 @@ def build_loss_matrix(dataset: type, time_period):
hh_id = sim.calculate("household_id").values
loss_matrix = loss_matrix.loc[hh_id]

targets_array, loss_matrix = _add_household_count_target(
loss_matrix,
targets_array,
sim,
)

# Census single-year age population projections

populations = pd.read_csv(CALIBRATION_FOLDER / "np2023_d5_mid.csv")
Expand Down
2 changes: 2 additions & 0 deletions policyengine_us_data/utils/national_target_parity.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,8 @@ def _legacy_reason(target_name: str) -> str:
return "legacy_cms_aca_spending_target_not_in_target_db"
if target_name.startswith("nation/accounting/"):
return "legacy_accounting_balance_target_not_in_target_db"
if target_name == "nation/source/household_count":
return "legacy_source_household_count_target_not_in_target_db"
if target_name.startswith("nation/irs/negative_household_market_income_"):
return "legacy_negative_market_income_target_not_in_target_db"
if target_name == "nation/census/infants":
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [
"Programming Language :: Python :: 3.14",
]
dependencies = [
"policyengine-us==1.709.1",
"policyengine-us==1.711.0",
# policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for
# PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost
# after _invalidate_all_caches) and is required by policyengine-us 1.682.1+.
Expand Down
52 changes: 52 additions & 0 deletions tests/unit/calibration/test_loss_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@
BEA_WAGES_AND_SALARIES_LOSS_WEIGHT,
BLS_CE_TOTALS,
HARD_CODED_TOTALS,
HOUSEHOLD_COUNT_LOSS_WEIGHT,
HOUSEHOLD_COUNT_TARGET,
LOW_AGI_INVESTMENT_INCOME_SOI_VARIABLES,
SOI_NEGATIVE_AGI_TARGETED_VARIABLES,
TRANSFER_BALANCE_TARGETS,
_add_bea_state_wage_targets,
_add_agi_metric_columns,
_add_acs_housing_cost_targets,
_add_household_count_target,
_add_aotc_targets,
_add_bls_ce_targets,
_add_ctc_targets,
Expand Down Expand Up @@ -167,6 +170,22 @@ def test_bea_nipa_direct_sum_targets_get_higher_loss_weight():
]


def test_household_count_target_gets_higher_loss_weight():
target_names = np.array(
[
HOUSEHOLD_COUNT_TARGET,
"nation/census/population_by_age/0",
]
)

weights = get_target_loss_weights(target_names)

assert weights.tolist() == [
HOUSEHOLD_COUNT_LOSS_WEIGHT,
1.0,
]


def test_aca_targets_roll_forward_to_2025():
targets, data_year = _load_aca_spending_and_enrollment_targets(2025)

Expand Down Expand Up @@ -243,6 +262,17 @@ def __init__(self, values):
self.values = np.asarray(values)


class _FakeHouseholdWeightSimulation:
def __init__(self, weights):
self.weights = weights

def calculate(self, variable, map_to=None, period=None):
assert variable == "household_weight"
assert map_to is None
assert period is None
return _FakeArrayResult(self.weights)


class _FakeSimulation:
def __init__(self):
self.calculate_calls = []
Expand Down Expand Up @@ -427,6 +457,28 @@ def test_state_agi_targets_are_limited_to_filers(tmp_path, monkeypatch):
)


def test_add_household_count_target_uses_source_weight_total():
loss_matrix = pd.DataFrame(index=[101, 102, 103, 104])

targets, loss_matrix = _add_household_count_target(
loss_matrix,
[],
_FakeHouseholdWeightSimulation([80.0, 20.0, 0.0, 0.0]),
)

assert targets == [100.0]
np.testing.assert_array_equal(
loss_matrix[HOUSEHOLD_COUNT_TARGET].to_numpy(),
np.ones(4, dtype=np.float32),
)


def test_build_loss_matrix_adds_household_count_target_before_reweighting():
source = inspect.getsource(build_loss_matrix)

assert "_add_household_count_target" in source


def test_add_ssi_recipient_targets_adds_total_and_age_counts():
targets, loss_matrix = _add_ssi_recipient_targets(
pd.DataFrame(),
Expand Down
14 changes: 14 additions & 0 deletions tests/unit/datasets/test_enhanced_cps_seeding.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,17 @@ def test_enhanced_cps_sources_use_deterministic_weight_priors():

assert "np.random.normal" not in source
assert source.count("initialize_weight_priors(original_weights.values)") == 2


def test_initialize_weight_priors_preserves_source_weight_total():
from policyengine_us_data.datasets.cps.enhanced_cps import (
initialize_weight_priors,
)

priors = initialize_weight_priors(
np.array([80.0, 20.0, 0.0, 0.0]),
zero_weight_total_share=0.5,
)

np.testing.assert_allclose(priors.sum(), 100.0)
np.testing.assert_allclose(priors, np.array([40.0, 10.0, 25.0, 25.0]))
8 changes: 4 additions & 4 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.