diff --git a/changelog.d/1102.changed b/changelog.d/1102.changed new file mode 100644 index 000000000..c5b9f76a6 --- /dev/null +++ b/changelog.d/1102.changed @@ -0,0 +1,2 @@ +Drop computed retirement helper variables from the CPS export. +Bumped policyengine-us to 1.702.1. diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index ab7503bae..c5f7c2e95 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -447,6 +447,10 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame): TEMPORARY_TAKEUP_SOURCE_ANCHORS = ("snap_reported", "ssi_reported") +TEMPORARY_IMPUTATION_SOURCE_VARIABLES = ( + "pension_income", + "retirement_distributions", +) def _drop_persisted_dataset_variables(file_path, variable_names): @@ -2753,6 +2757,8 @@ def add_tips(self, cps: h5py.File): "is_household_head", "has_disability_income", "household_size", + "pension_income", + "retirement_distributions", "retirement_income", "non_ssi_income", ], @@ -2760,6 +2766,10 @@ def add_tips(self, cps: h5py.File): ) self.save_dataset(cps) self.save_dataset(household_vehicle_data) + _drop_persisted_dataset_variables( + self.file_path, + TEMPORARY_IMPUTATION_SOURCE_VARIABLES, + ) @pipeline_node( diff --git a/pyproject.toml b/pyproject.toml index a9224268a..907d463d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "policyengine-us==1.702.0", + "policyengine-us==1.702.1", # policyengine-core 3.26.1 is the current 3.26.x runtime and includes the fix for # PolicyEngine/policyengine-core#482 (user-set ETERNITY inputs lost # after _invalidate_all_caches) and is required by policyengine-us 1.682.1+. diff --git a/tests/integration/test_cps_generation.py b/tests/integration/test_cps_generation.py index 5107d4bd9..eb8a68796 100644 --- a/tests/integration/test_cps_generation.py +++ b/tests/integration/test_cps_generation.py @@ -137,7 +137,7 @@ def save_dataset(self, data): assert h5_file["takes_up_housing_assistance_if_eligible"][:].tolist() == [True] -def test_add_tips_derives_tipped_status_from_raw_cps(monkeypatch): +def test_add_tips_derives_tipped_status_from_raw_cps(monkeypatch, tmp_path): import policyengine_us_data.datasets.sipp as sipp_module from policyengine_us_data.datasets.cps.cps import add_tips @@ -168,6 +168,13 @@ def load(self): class FakeDataset: def __init__(self): self.raw_cps = FakeRawCPS() + self.file_path = tmp_path / "cps_2024.h5" + with h5py.File(self.file_path, "w") as h5_file: + h5_file.create_dataset("pension_income", data=np.array([1.0, 2.0])) + h5_file.create_dataset( + "retirement_distributions", + data=np.array([3.0, 4.0]), + ) self.saved_dataset = None self.base_dataset = { "person_id": [1, 2], @@ -178,6 +185,8 @@ def __init__(self): "qualified_dividend_income": [40.0, 0.0], "non_qualified_dividend_income": [10.0, 0.0], "rental_income": [0.0, 0.0], + "pension_income": [2_000.0, 0.0], + "retirement_distributions": [3_000.0, 4_000.0], "age": [30, 45], "household_weight": [1.0, 1.0], "is_female": [False, True], @@ -206,6 +215,7 @@ class FakeAssetModel: def predict(self, X_test, mean_quantile): assert X_test["interest_income"].tolist() == [125.0, 0.0] assert X_test["dividend_income"].tolist() == [50.0, 0.0] + assert X_test["retirement_income"].tolist() == [5_000.0, 4_000.0] return pd.DataFrame( { "bank_account_assets": [0.0, 0.0], @@ -268,6 +278,11 @@ def fake_predict_ssi_disability_criteria(model, receiver_df): True, False, ] + assert "pension_income" not in dataset.saved_dataset + assert "retirement_distributions" not in dataset.saved_dataset + with h5py.File(dataset.file_path, "r") as h5_file: + assert "pension_income" not in h5_file + assert "retirement_distributions" not in h5_file def test_add_rent_requests_person_level_frames(monkeypatch, tmp_path): diff --git a/uv.lock b/uv.lock index 67329f64c..81d80b028 100644 --- a/uv.lock +++ b/uv.lock @@ -2122,7 +2122,7 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.702.0" +version = "1.702.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, @@ -2132,9 +2132,9 @@ dependencies = [ { name = "tables" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/43/7e/d3095e6dde387cb56eb2dd0543cdc0b0f7670446d3b6ea45468165d60d1f/policyengine_us-1.702.0.tar.gz", hash = "sha256:689526d444c98681d517247d5308e795e02f24c65423295232ab347e61cac981", size = 9876039, upload-time = "2026-05-21T14:56:36.133Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4d/3e/6000ddb6cd51bb5d832089cf2904b88773e431e358fef4f4bd736d5aea0e/policyengine_us-1.702.1.tar.gz", hash = "sha256:b3782233a8e3d6c5eca48f329cad87e46319c170eacb64836f1966e58e5f95b6", size = 9884003, upload-time = "2026-05-21T16:42:13.543Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/95/1d/67cde50bf6401c5c3ab95ff8f4036876422fa6fc72481425f3f3c7eb3177/policyengine_us-1.702.0-py3-none-any.whl", hash = "sha256:83d787337760587dbfcfe6bc2ae59afb53d2baa5827cb535776ff7147561a72f", size = 10649615, upload-time = "2026-05-21T14:56:33.349Z" }, + { url = "https://files.pythonhosted.org/packages/79/d7/95bbe3549a5f932ff91a53f84a78a70497b2e11e395dfd1fdd1d76ba9a71/policyengine_us-1.702.1-py3-none-any.whl", hash = "sha256:f6029ae7319219f1e36c805f778dd8594742e89867b0c5fc07459b6ee18b487e", size = 10673068, upload-time = "2026-05-21T16:42:09.985Z" }, ] [[package]] @@ -2204,7 +2204,7 @@ requires-dist = [ { name = "pandas", specifier = ">=2.3.1" }, { name = "pip-system-certs", specifier = ">=3.0" }, { name = "policyengine-core", specifier = ">=3.26.1,<3.27" }, - { name = "policyengine-us", specifier = "==1.702.0" }, + { name = "policyengine-us", specifier = "==1.702.1" }, { name = "requests", specifier = ">=2.25.0" }, { name = "samplics", marker = "extra == 'calibration'" }, { name = "scipy", specifier = ">=1.15.3" },