Skip to content

Commit 082b259

Browse files
committed
Merge main, resolve uv.lock conflict
2 parents f7c3bb3 + c351735 commit 082b259

7 files changed

Lines changed: 130 additions & 23 deletions

File tree

CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [1.54.1] - 2026-01-26 02:49:11
9+
10+
### Fixed
11+
12+
- Derive partnership_se_income from PUF source columns using Yale Budget Lab's gross-up approach instead of looking for non-existent k1bx14 columns.
13+
14+
## [1.54.0] - 2026-01-25 17:43:38
15+
16+
### Added
17+
18+
- partnership_se_income variable from Schedule K-1 Box 14 (k1bx14p + k1bx14s), representing partnership income subject to self-employment tax.
19+
20+
## [1.53.1] - 2026-01-25 15:48:00
21+
22+
### Changed
23+
24+
- Bumped policyengine-core minimum version to 3.23.5 for pandas 3.0 compatibility
25+
826
## [1.53.0] - 2026-01-23 20:51:58
927

1028
### Changed
@@ -815,6 +833,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
815833

816834

817835

836+
[1.54.1]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.54.0...1.54.1
837+
[1.54.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.53.1...1.54.0
838+
[1.53.1]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.53.0...1.53.1
818839
[1.53.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.52.0...1.53.0
819840
[1.52.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.51.1...1.52.0
820841
[1.51.1]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.51.0...1.51.1

changelog.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,3 +687,20 @@
687687
changed:
688688
- Added policyengine-claude plugin auto-install configuration.
689689
date: 2026-01-23 20:51:58
690+
- bump: patch
691+
changes:
692+
changed:
693+
- Bumped policyengine-core minimum version to 3.23.5 for pandas 3.0 compatibility
694+
date: 2026-01-25 15:48:00
695+
- bump: minor
696+
changes:
697+
added:
698+
- partnership_se_income variable from Schedule K-1 Box 14 (k1bx14p + k1bx14s),
699+
representing partnership income subject to self-employment tax.
700+
date: 2026-01-25 17:43:38
701+
- bump: patch
702+
changes:
703+
fixed:
704+
- Derive partnership_se_income from PUF source columns using Yale Budget Lab's
705+
gross-up approach instead of looking for non-existent k1bx14 columns.
706+
date: 2026-01-26 02:49:11

policyengine_us_data/datasets/cps/extended_cps.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
"miscellaneous_income",
5151
"alimony_expense",
5252
"farm_income",
53+
"partnership_se_income",
5354
"alimony_income",
5455
"health_savings_account_ald",
5556
"non_sch_d_capital_gains",

policyengine_us_data/datasets/puf/puf.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,33 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
381381
puf["unreported_payroll_tax"] = puf.E09800
382382
# Ignore f2441 (AMT form attached)
383383
# Ignore cmbtp (estimate of AMT income not in AGI)
384-
# Ignore k1bx14s and k1bx14p (partner self-employment income included in partnership and S-corp income)
384+
385+
# Partnership self-employment income from Schedule K-1 Box 14
386+
# This is the portion of partnership income subject to SE tax (general partners)
387+
# Derived from total SE income minus Schedule C and Schedule F income
388+
# Based on Yale Budget Lab's Tax-Data process_puf.R approach:
389+
# E30400 = taxpayer's TAXABLE SE income (already * 0.9235)
390+
# E30500 = spouse's TAXABLE SE income (already * 0.9235)
391+
# E00900 = Schedule C net profit/loss (gross)
392+
# E02100 = Schedule F farm income (gross)
393+
# Since E30400/E30500 are post-deduction (taxable), we gross them up
394+
# by dividing by 0.9235 before subtracting Sch C/F.
395+
# PolicyEngine applies the 0.9235 factor itself in taxable_self_employment_income.
396+
SE_DEDUCTION_FACTOR = 0.9235 # 1 - 0.5 * 0.153 (half of SE tax rate)
397+
taxable_se = puf["E30400"].fillna(0) + puf["E30500"].fillna(0)
398+
gross_se = taxable_se / SE_DEDUCTION_FACTOR
399+
schedule_c_f_income = puf["E00900"].fillna(0) + puf["E02100"].fillna(0)
400+
# Only compute when there's partnership activity (net partnership income != 0)
401+
has_partnership = (
402+
puf["E25940"].fillna(0)
403+
+ puf["E25980"].fillna(0)
404+
- puf["E25920"].fillna(0)
405+
- puf["E25960"].fillna(0)
406+
) != 0
407+
partnership_se = np.where(
408+
has_partnership, gross_se - schedule_c_f_income, 0
409+
)
410+
puf["partnership_se_income"] = partnership_se
385411

386412
# --- Qualified Business Income Deduction (QBID) simulation ---
387413
w2, ubia = simulate_w2_and_ubia_from_puf(puf, seed=42)
@@ -491,6 +517,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
491517
"business_is_sstb",
492518
"deductible_mortgage_interest",
493519
"partnership_s_corp_income",
520+
"partnership_se_income",
494521
"qualified_reit_and_ptp_income",
495522
"qualified_bdc_income",
496523
]
@@ -544,6 +571,13 @@ def generate(self):
544571
for variable in system.variables
545572
}
546573

574+
# Filter FINANCIAL_SUBSET to only include variables defined in
575+
# policyengine-us. This allows us-data to be updated before or after
576+
# policyengine-us without breaking.
577+
self.available_financial_vars = [
578+
v for v in FINANCIAL_SUBSET if v in self.variable_to_entity
579+
]
580+
547581
VARIABLES = [
548582
"person_id",
549583
"tax_unit_id",
@@ -563,7 +597,7 @@ def generate(self):
563597
"is_tax_unit_head",
564598
"is_tax_unit_spouse",
565599
"is_tax_unit_dependent",
566-
] + FINANCIAL_SUBSET
600+
] + self.available_financial_vars
567601

568602
self.holder = {variable: [] for variable in VARIABLES}
569603

@@ -607,7 +641,7 @@ def generate(self):
607641
def add_tax_unit(self, row, tax_unit_id):
608642
self.holder["tax_unit_id"].append(tax_unit_id)
609643

610-
for key in FINANCIAL_SUBSET:
644+
for key in self.available_financial_vars:
611645
if self.variable_to_entity[key] == "tax_unit":
612646
self.holder[key].append(row[key])
613647

@@ -649,7 +683,7 @@ def add_filer(self, row, tax_unit_id):
649683
row["interest_deduction"]
650684
)
651685

652-
for key in FINANCIAL_SUBSET:
686+
for key in self.available_financial_vars:
653687
if key == "deductible_mortgage_interest":
654688
# Skip this one- we are adding it artificially at the filer level.
655689
continue
@@ -682,7 +716,7 @@ def add_spouse(self, row, tax_unit_id):
682716

683717
self.holder["deductible_mortgage_interest"].append(0)
684718

685-
for key in FINANCIAL_SUBSET:
719+
for key in self.available_financial_vars:
686720
if key == "deductible_mortgage_interest":
687721
# Skip this one- we are adding it artificially at the filer level.
688722
continue
@@ -706,7 +740,7 @@ def add_dependent(self, row, tax_unit_id, dependent_id):
706740

707741
self.holder["deductible_mortgage_interest"].append(0)
708742

709-
for key in FINANCIAL_SUBSET:
743+
for key in self.available_financial_vars:
710744
if key == "deductible_mortgage_interest":
711745
# Skip this one- we are adding it artificially at the filer level.
712746
continue
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""Test pandas 3.0 compatibility.
2+
3+
This test verifies that policyengine-core 3.23.5+ correctly handles
4+
pandas Series with StringDtype index when encoding enums.
5+
"""
6+
7+
import numpy as np
8+
import pandas as pd
9+
import pytest
10+
11+
from policyengine_core.enums import Enum
12+
13+
14+
class SampleEnum(Enum):
15+
VALUE_A = "value_a"
16+
VALUE_B = "value_b"
17+
18+
19+
def test_enum_encode_with_pandas_series():
20+
"""Test that Enum.encode works with pandas Series containing enum items.
21+
22+
In pandas 3.0, Series with StringDtype use label-based indexing by default.
23+
This test verifies the fix in policyengine-core 3.23.5 that uses .iloc[0]
24+
for positional access.
25+
"""
26+
enum_items = [SampleEnum.VALUE_A, SampleEnum.VALUE_B, SampleEnum.VALUE_A]
27+
series = pd.Series(enum_items)
28+
29+
# This would fail with KeyError: 0 before the fix
30+
encoded = SampleEnum.encode(series)
31+
32+
assert len(encoded) == 3
33+
assert list(encoded) == [0, 1, 0]

pyproject.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
88

99
[project]
1010
name = "policyengine_us_data"
11-
version = "1.53.0"
11+
version = "1.54.1"
1212
description = "A package to create representative microdata for the US."
1313
readme = "README.md"
1414
authors = [
@@ -21,12 +21,12 @@ classifiers = [
2121
"Programming Language :: Python :: 3.13",
2222
]
2323
dependencies = [
24-
"policyengine-us>=1.353.0",
25-
"policyengine-core>=3.19.0",
24+
"policyengine-us>=1.516.0",
25+
"policyengine-core>=3.23.6",
2626
"pandas>=2.3.1",
2727
"requests>=2.25.0",
2828
"tqdm>=4.60.0",
29-
"microdf_python>=1.0.0",
29+
"microdf_python>=1.2.1",
3030
"setuptools>=60",
3131
"microimpute>=1.1.4",
3232
"pip-system-certs>=3.0",

uv.lock

Lines changed: 14 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)