Skip to content

Commit 71253ab

Browse files
authored
Enforce NIPA employment target in calibration (#1020)
1 parent 1cd9e2b commit 71253ab

11 files changed

Lines changed: 258 additions & 43 deletions

File tree

AGENTS.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,25 @@ artifacts, or public library functions, read
2020
When diagnosing a deployed Modal pipeline run or a failed publication pipeline,
2121
read `docs/engineering/skills/pipeline_operations.md`.
2222

23+
When adding, changing, or reviewing calibration target definitions, read
24+
`docs/engineering/skills/calibration_targets.md`.
25+
26+
## Calibration targets
27+
28+
Manually sourced national or local-file calibration targets must be registered
29+
in every active target path before merging:
30+
31+
1. `policyengine_us_data/utils/loss.py` for the ECPS loss matrix.
32+
2. `policyengine_us_data/db/etl_national_targets.py` for `policy_data.db` and
33+
local H5 validation inputs.
34+
3. `policyengine_us_data/calibration/target_config.yaml` when the default
35+
calibration uses an `include:` list; otherwise the target can exist in
36+
`policy_data.db` but still be omitted from calibration.
37+
38+
Do not treat a target appearing in `policy_data.db` as proof that published
39+
datasets were calibrated to it. Add or update tests that fail if a new target is
40+
present in one path but missing from another.
41+
2342
## GitHub PRs
2443

2544
Read `docs/engineering/skills/github-prs.md` before opening, replacing, or

changelog.d/1019.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Ensure BEA NIPA direct-sum targets are selected by default calibration and fail publication validation when enhanced CPS employment income misses the wages target.
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Calibration Target Changes
2+
3+
Use this workflow when adding, changing, or reviewing manually sourced
4+
calibration targets.
5+
6+
## Dual Registration
7+
8+
New targets must be registered in both active target systems:
9+
10+
- `policyengine_us_data/utils/loss.py` for the ECPS `build_loss_matrix()` path.
11+
- `policyengine_us_data/db/etl_national_targets.py` for `policy_data.db`, local
12+
H5 outputs, and validation inputs.
13+
14+
If the default calibration path uses `policyengine_us_data/calibration/target_config.yaml`
15+
with an `include:` list, also add the matching include rule there. A target can
16+
exist in `policy_data.db` and still be ignored by calibration if it is missing
17+
from `target_config.yaml`.
18+
19+
## Tests
20+
21+
Every target change should add or update tests that prove the target is wired
22+
through every active path. For manually sourced national targets, cover:
23+
24+
- the ECPS loss matrix registration in `tests/unit/calibration/test_loss_targets.py`;
25+
- the DB ETL row in `tests/unit/test_etl_national_targets.py`;
26+
- the default calibration include rule in
27+
`tests/unit/calibration/test_target_config.py`;
28+
- any publication guard in `tests/unit/test_upload_completed_datasets.py` when
29+
a missing target would make a released dataset materially wrong.
30+
31+
Do not use a successful DB ETL test as a substitute for a calibration-selection
32+
test.

policyengine_us_data/calibration/target_config.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,14 @@ include:
164164
geo_level: national
165165
- variable: childcare_expenses
166166
geo_level: national
167+
- variable: employment_income_before_lsr
168+
geo_level: national
169+
- variable: nipa_proprietors_income
170+
geo_level: national
171+
- variable: interest_income
172+
geo_level: national
173+
- variable: dividend_income
174+
geo_level: national
167175
- variable: long_term_capital_gains
168176
geo_level: national
169177
- variable: medicaid

policyengine_us_data/db/etl_national_targets.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,16 @@
3030
target_variable_components,
3131
)
3232

33+
# Manually sourced national targets must be registered in both active target
34+
# systems: policyengine_us_data/utils/loss.py for ECPS calibration and this
35+
# file for policy_data.db/local H5 targets. If the default calibration include
36+
# list should train on the target, add it to calibration/target_config.yaml too.
3337
BEA_NIPA_WAGES_AND_SALARIES_2024 = 12_387_929_000_000
3438
BEA_NIPA_PROPRIETORS_INCOME_2024 = 2_023_080_000_000
3539
BEA_NIPA_PERSONAL_INTEREST_INCOME_2024 = 1_926_644_000_000
3640
BEA_NIPA_PERSONAL_DIVIDEND_INCOME_2024 = 2_218_700_000_000
3741

38-
NIPA_PROPRIETORS_INCOME_VARIABLE = (
39-
"total_self_employment_income+farm_operations_income+partnership_s_corp_income"
40-
)
42+
NIPA_PROPRIETORS_INCOME_VARIABLE = "nipa_proprietors_income"
4143
NIPA_PERSONAL_INTEREST_INCOME_VARIABLE = "interest_income"
4244
TAXABLE_INTEREST_AND_ORDINARY_DIVIDENDS_VARIABLE = (
4345
"taxable_interest_income+dividend_income"
@@ -453,9 +455,8 @@ def extract_national_targets(year: int = DEFAULT_YEAR):
453455
"notes": (
454456
"Proprietors' income with IVA and CCAdj for all persons, "
455457
"including nonfilers; FRED/BEA series A041RC1A027NBEA. "
456-
"Mapped to the closest additive PolicyEngine aggregate: "
457-
"total self-employment, farm operations, and "
458-
"partnership/S-corp income."
458+
"Mapped to the PolicyEngine-US NIPA proprietors' income "
459+
"aggregate."
459460
),
460461
"year": 2024,
461462
},

policyengine_us_data/storage/upload_completed_datasets.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
from policyengine_core.data import Dataset
99

1010
from policyengine_us_data.__version__ import __version__ as DATA_PACKAGE_VERSION
11+
from policyengine_us_data.db.etl_national_targets import (
12+
BEA_NIPA_WAGES_AND_SALARIES_2024,
13+
)
1114
from policyengine_us_data.datasets import EnhancedCPS_2024
1215
from policyengine_us_data.datasets.cps.cps import CPS_2024
1316
from policyengine_us_data.datasets.cps.enhanced_cps import clone_diagnostics_path
@@ -125,7 +128,14 @@ class MicrosimulationAggregateCheck:
125128
]
126129

127130
# Aggregate thresholds for broad sanity checks (year 2024).
128-
MIN_EMPLOYMENT_INCOME_SUM = 5e12 # $5 trillion
131+
MIN_PLAUSIBLE_EMPLOYMENT_INCOME_SUM = 5e12 # $5 trillion
132+
NIPA_EMPLOYMENT_INCOME_TOLERANCE = 0.10
133+
MIN_ENHANCED_CPS_EMPLOYMENT_INCOME_SUM = BEA_NIPA_WAGES_AND_SALARIES_2024 * (
134+
1 - NIPA_EMPLOYMENT_INCOME_TOLERANCE
135+
)
136+
MAX_ENHANCED_CPS_EMPLOYMENT_INCOME_SUM = BEA_NIPA_WAGES_AND_SALARIES_2024 * (
137+
1 + NIPA_EMPLOYMENT_INCOME_TOLERANCE
138+
)
129139
MIN_HOUSEHOLD_WEIGHT_SUM = 100e6 # 100 million
130140
MAX_HOUSEHOLD_WEIGHT_SUM = 200e6 # 200 million
131141

@@ -152,9 +162,10 @@ class MicrosimulationAggregateCheck:
152162
"enhanced_cps_2024.h5": (
153163
MicrosimulationAggregateCheck(
154164
variable="employment_income",
155-
label="employment_income sum",
165+
label="employment_income sum vs NIPA wages target",
156166
statistic="sum",
157-
min_value=MIN_EMPLOYMENT_INCOME_SUM,
167+
min_value=MIN_ENHANCED_CPS_EMPLOYMENT_INCOME_SUM,
168+
max_value=MAX_ENHANCED_CPS_EMPLOYMENT_INCOME_SUM,
158169
),
159170
MicrosimulationAggregateCheck(
160171
variable="social_security_retirement",
@@ -188,7 +199,7 @@ class MicrosimulationAggregateCheck:
188199
variable="employment_income",
189200
label="employment_income sum",
190201
statistic="sum",
191-
min_value=MIN_EMPLOYMENT_INCOME_SUM,
202+
min_value=MIN_PLAUSIBLE_EMPLOYMENT_INCOME_SUM,
192203
),
193204
),
194205
}

policyengine_us_data/utils/loss.py

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,24 +34,47 @@
3434

3535
# National calibration targets consumed by build_loss_matrix().
3636
# These values are specific to 2024 — they should NOT be applied to
37-
# other years without re-sourcing. They are duplicated in
38-
# db/etl_national_targets.py which loads them into policy_data.db.
39-
# A future PR should wire build_loss_matrix() to read from the
40-
# database so this dict can be deleted. See PR #488.
37+
# other years without re-sourcing. They must stay registered here for
38+
# ECPS calibration, in db/etl_national_targets.py for policy_data.db,
39+
# and in calibration/target_config.yaml when the default calibration
40+
# include list should train on them. A future PR should wire
41+
# build_loss_matrix() to read from the database so this duplication can
42+
# be deleted. See PR #488.
4143

4244
BEA_NIPA_WAGES_AND_SALARIES_2024 = 12_387_929_000_000
4345
BEA_NIPA_PROPRIETORS_INCOME_2024 = 2_023_080_000_000
4446
BEA_NIPA_PERSONAL_INTEREST_INCOME_2024 = 1_926_644_000_000
4547
BEA_NIPA_PERSONAL_DIVIDEND_INCOME_2024 = 2_218_700_000_000
4648

47-
NIPA_PROPRIETORS_INCOME_VARIABLE = (
48-
"total_self_employment_income+farm_operations_income+partnership_s_corp_income"
49-
)
49+
NIPA_PROPRIETORS_INCOME_VARIABLE = "nipa_proprietors_income"
5050
NIPA_PERSONAL_INTEREST_INCOME_VARIABLE = "interest_income"
5151
TAXABLE_INTEREST_AND_ORDINARY_DIVIDENDS_VARIABLE = (
5252
"taxable_interest_income+dividend_income"
5353
)
5454

55+
BEA_NIPA_DIRECT_SUM_TARGETS = (
56+
(
57+
"nation/bea/nipa_wages_and_salaries",
58+
"employment_income_before_lsr",
59+
BEA_NIPA_WAGES_AND_SALARIES_2024,
60+
),
61+
(
62+
"nation/bea/nipa_proprietors_income",
63+
NIPA_PROPRIETORS_INCOME_VARIABLE,
64+
BEA_NIPA_PROPRIETORS_INCOME_2024,
65+
),
66+
(
67+
"nation/bea/nipa_personal_interest_income",
68+
NIPA_PERSONAL_INTEREST_INCOME_VARIABLE,
69+
BEA_NIPA_PERSONAL_INTEREST_INCOME_2024,
70+
),
71+
(
72+
"nation/bea/nipa_personal_dividend_income",
73+
"dividend_income",
74+
BEA_NIPA_PERSONAL_DIVIDEND_INCOME_2024,
75+
),
76+
)
77+
5578
CBO_INCOME_BY_SOURCE_TARGETS = [
5679
("irs_employment_income", "employment_income"),
5780
("self_employment_income", "self_employment_income"),
@@ -1249,29 +1272,7 @@ def build_loss_matrix(dataset: type, time_period):
12491272
)
12501273
targets_array.append(income_by_source._children[parameter])
12511274

1252-
bea_nipa_targets = [
1253-
(
1254-
"nation/bea/nipa_wages_and_salaries",
1255-
"employment_income_before_lsr",
1256-
BEA_NIPA_WAGES_AND_SALARIES_2024,
1257-
),
1258-
(
1259-
"nation/bea/nipa_proprietors_income",
1260-
NIPA_PROPRIETORS_INCOME_VARIABLE,
1261-
BEA_NIPA_PROPRIETORS_INCOME_2024,
1262-
),
1263-
(
1264-
"nation/bea/nipa_personal_interest_income",
1265-
NIPA_PERSONAL_INTEREST_INCOME_VARIABLE,
1266-
BEA_NIPA_PERSONAL_INTEREST_INCOME_2024,
1267-
),
1268-
(
1269-
"nation/bea/nipa_personal_dividend_income",
1270-
"dividend_income",
1271-
BEA_NIPA_PERSONAL_DIVIDEND_INCOME_2024,
1272-
),
1273-
]
1274-
for label, variable, target in bea_nipa_targets:
1275+
for label, variable, target in BEA_NIPA_DIRECT_SUM_TARGETS:
12751276
loss_matrix[label] = _calculate_household_target_values(
12761277
sim,
12771278
variable,

tests/unit/calibration/test_loss_targets.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
AGE_BUCKETED_HEALTH_TARGETS,
1111
AGGREGATE_LEVEL_TARGETED_VARIABLES,
1212
AGI_LEVEL_TARGETED_VARIABLES,
13+
BEA_NIPA_DIRECT_SUM_TARGETS,
1314
BLS_CE_TOTALS,
1415
HARD_CODED_TOTALS,
1516
TRANSFER_BALANCE_TARGETS,
@@ -32,13 +33,35 @@
3233
build_loss_matrix,
3334
get_target_error_normalisation,
3435
)
36+
from policyengine_us_data.db import etl_national_targets
3537

3638

3739
def test_legacy_loss_targets_include_aggregate_qbi_deduction():
3840
assert "qualified_business_income_deduction" in AGGREGATE_LEVEL_TARGETED_VARIABLES
3941
assert "qualified_business_income_deduction" not in AGI_LEVEL_TARGETED_VARIABLES
4042

4143

44+
def test_bea_nipa_direct_sum_targets_match_targets_db():
45+
loss_targets_by_variable = {
46+
variable: target for _, variable, target in BEA_NIPA_DIRECT_SUM_TARGETS
47+
}
48+
49+
assert loss_targets_by_variable == {
50+
"employment_income_before_lsr": (
51+
etl_national_targets.BEA_NIPA_WAGES_AND_SALARIES_2024
52+
),
53+
etl_national_targets.NIPA_PROPRIETORS_INCOME_VARIABLE: (
54+
etl_national_targets.BEA_NIPA_PROPRIETORS_INCOME_2024
55+
),
56+
etl_national_targets.NIPA_PERSONAL_INTEREST_INCOME_VARIABLE: (
57+
etl_national_targets.BEA_NIPA_PERSONAL_INTEREST_INCOME_2024
58+
),
59+
"dividend_income": (
60+
etl_national_targets.BEA_NIPA_PERSONAL_DIVIDEND_INCOME_2024
61+
),
62+
}
63+
64+
4265
def test_aca_targets_roll_forward_to_2025():
4366
targets, data_year = _load_aca_spending_and_enrollment_targets(2025)
4467

tests/unit/calibration/test_target_config.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
save_calibration_package,
1515
load_calibration_package,
1616
)
17+
from policyengine_us_data.db import etl_national_targets
1718

1819

1920
@pytest.fixture
@@ -345,6 +346,25 @@ def test_training_config_includes_medicare_part_b_target(self):
345346
"geo_level": "national",
346347
} in config["include"]
347348

349+
def test_training_config_includes_bea_nipa_direct_sum_targets(self):
350+
config = load_target_config(
351+
str(
352+
Path(__file__).resolve().parents[3]
353+
/ "policyengine_us_data"
354+
/ "calibration"
355+
/ "target_config.yaml"
356+
)
357+
)
358+
359+
include_rules = config["include"]
360+
for variable in [
361+
"employment_income_before_lsr",
362+
etl_national_targets.NIPA_PROPRIETORS_INCOME_VARIABLE,
363+
etl_national_targets.NIPA_PERSONAL_INTEREST_INCOME_VARIABLE,
364+
"dividend_income",
365+
]:
366+
assert {"variable": variable, "geo_level": "national"} in include_rules
367+
348368
def test_training_config_includes_soi_ltcg_target(self):
349369
config = load_target_config(
350370
str(

0 commit comments

Comments
 (0)