Skip to content

Commit ad9d408

Browse files
MaxGhenisclaude
andauthored
Fix double-weight application in sanity tests (#572)
* Fix double-weight application in sanity tests (#571) sim.calculate("household_weight").sum() returns weighted sum of weights (sum of weight²) because MicroSeries embeds weights. Use .values.sum() to get the raw sum of weights for household/person count checks. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Trigger CI --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 13140e5 commit ad9d408

2 files changed

Lines changed: 7 additions & 8 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed double-weight application in dataset sanity tests: use `.values.sum()` for household_weight checks to avoid MicroSeries applying weights twice.

policyengine_us_data/tests/test_datasets/test_dataset_sanity.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,17 @@ def test_ecps_self_employment_income_positive(ecps_sim):
4848

4949
def test_ecps_household_count(ecps_sim):
5050
"""Household count should be roughly 130-160M."""
51-
weights = ecps_sim.calculate("household_weight")
52-
total_hh = weights.sum()
51+
total_hh = ecps_sim.calculate("household_weight").values.sum()
5352
assert (
5453
100e6 < total_hh < 200e6
5554
), f"Total households = {total_hh:.2e}, expected 100M-200M."
5655

5756

5857
def test_ecps_person_count(ecps_sim):
5958
"""Weighted person count should be roughly 330M."""
60-
weights = ecps_sim.calculate("household_weight", map_to="person")
61-
total_people = weights.sum()
59+
total_people = ecps_sim.calculate(
60+
"household_weight", map_to="person"
61+
).values.sum()
6262
assert (
6363
250e6 < total_people < 400e6
6464
), f"Total people = {total_people:.2e}, expected 250M-400M."
@@ -100,8 +100,7 @@ def test_cps_employment_income_positive(cps_sim):
100100

101101

102102
def test_cps_household_count(cps_sim):
103-
weights = cps_sim.calculate("household_weight")
104-
total_hh = weights.sum()
103+
total_hh = cps_sim.calculate("household_weight").values.sum()
105104
assert 100e6 < total_hh < 200e6, f"CPS total households = {total_hh:.2e}."
106105

107106

@@ -129,8 +128,7 @@ def test_sparse_employment_income_positive(sparse_sim):
129128

130129

131130
def test_sparse_household_count(sparse_sim):
132-
weights = sparse_sim.calculate("household_weight")
133-
total_hh = weights.sum()
131+
total_hh = sparse_sim.calculate("household_weight").values.sum()
134132
assert (
135133
100e6 < total_hh < 200e6
136134
), f"Sparse total households = {total_hh:.2e}, expected 100M-200M."

0 commit comments

Comments
 (0)