Skip to content

Commit 5f25eb4

Browse files
authored
Merge pull request #2708 from PolicyEngine/enhanced-cps-full-launch
Add blog post announcing Enhanced CPS full launch
2 parents 4e96c4e + 308da36 commit 5f25eb4

6 files changed

Lines changed: 121 additions & 128 deletions

File tree

162 KB
Loading

src/__tests__/pages/policy/PolicyRightSidebar.test.js

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { fireEvent, render, waitFor } from "@testing-library/react";
1+
import { render, waitFor } from "@testing-library/react";
22
import { BrowserRouter, useSearchParams } from "react-router-dom";
33
import PolicyRightSidebar, {
44
SinglePolicyChange,
@@ -48,7 +48,7 @@ const standardPolicyUK = {
4848
};
4949

5050
describe("Enhanced CPS selector", () => {
51-
test("Should be present for the US site", async () => {
51+
test("Should not be present (selector removed)", async () => {
5252
const testSearchParams = {
5353
focus: "gov",
5454
};
@@ -67,10 +67,10 @@ describe("Enhanced CPS selector", () => {
6767
defaultOpen: true,
6868
};
6969

70-
const { getByTestId } = render(<PolicyRightSidebar {...props} />);
70+
const { queryByTestId } = render(<PolicyRightSidebar {...props} />);
7171

7272
await waitFor(() => {
73-
expect(getByTestId("enhanced_cps_switch")).toBeInTheDocument();
73+
expect(queryByTestId("enhanced_cps_switch")).not.toBeInTheDocument();
7474
});
7575
});
7676
test("Should not render for the UK site", async () => {
@@ -98,7 +98,7 @@ describe("Enhanced CPS selector", () => {
9898
expect(queryByTestId("enhanced_cps_switch")).not.toBeInTheDocument();
9999
});
100100
});
101-
test("Should be enabled when region is 'us'", async () => {
101+
test("Should not be present when region is 'us' (selector removed)", async () => {
102102
const testSearchParams = {
103103
focus: "gov",
104104
region: "us",
@@ -118,13 +118,11 @@ describe("Enhanced CPS selector", () => {
118118
defaultOpen: true,
119119
};
120120

121-
const { getByTestId } = render(<PolicyRightSidebar {...props} />);
121+
const { queryByTestId } = render(<PolicyRightSidebar {...props} />);
122122

123-
expect(getByTestId("enhanced_cps_switch").classList).not.toContain(
124-
"ant-switch-disabled",
125-
);
123+
expect(queryByTestId("enhanced_cps_switch")).not.toBeInTheDocument();
126124
});
127-
test("Should be enabled when region is 'null'", async () => {
125+
test("Should not be present when region is 'null' (selector removed)", async () => {
128126
const testSearchParams = {
129127
focus: "gov",
130128
};
@@ -143,24 +141,16 @@ describe("Enhanced CPS selector", () => {
143141
defaultOpen: true,
144142
};
145143

146-
const { getByTestId } = render(<PolicyRightSidebar {...props} />);
144+
const { queryByTestId } = render(<PolicyRightSidebar {...props} />);
147145

148-
expect(getByTestId("enhanced_cps_switch").classList).not.toContain(
149-
"ant-switch-disabled",
150-
);
146+
expect(queryByTestId("enhanced_cps_switch")).not.toBeInTheDocument();
151147
});
152-
test("Should change region when selected", () => {
148+
test("Should not have selector to change dataset (selector removed)", () => {
153149
const testSearchParams = {
154150
focus: "gov",
155151
region: "us",
156152
};
157153

158-
const expectedSearchParams = {
159-
focus: "gov",
160-
region: "us",
161-
dataset: "enhanced_cps",
162-
};
163-
164154
const mockSetSearchParams = jest.fn();
165155

166156
useSearchParams.mockImplementation(() => {
@@ -177,11 +167,8 @@ describe("Enhanced CPS selector", () => {
177167
defaultOpen: true,
178168
};
179169

180-
const { getByTestId } = render(<PolicyRightSidebar {...props} />);
181-
fireEvent.click(getByTestId("enhanced_cps_switch"));
182-
expect(mockSetSearchParams).toHaveBeenCalledWith(
183-
new URLSearchParams(expectedSearchParams),
184-
);
170+
const { queryByTestId } = render(<PolicyRightSidebar {...props} />);
171+
expect(queryByTestId("enhanced_cps_switch")).not.toBeInTheDocument();
185172
});
186173
});
187174

2.3 MB
Loading

src/pages/policy/PolicyRightSidebar.jsx

Lines changed: 0 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -456,100 +456,6 @@ function FullLiteToggle() {
456456
* @param {Number|String} timePeriod The year the simulation should run over
457457
* @returns {import("react").ReactElement}
458458
*/
459-
function DatasetSelector(props) {
460-
const { presentDataset, timePeriod } = props;
461-
const [isChecked, setIsChecked] = useState(confirmIsChecked(presentDataset));
462-
const [searchParams, setSearchParams] = useSearchParams();
463-
const displayCategory = useDisplayCategory();
464-
465-
function confirmIsChecked(presentDataset) {
466-
// Define presentDataset value that activates check
467-
const checkValue = "enhanced_cps";
468-
if (presentDataset === checkValue) {
469-
return true;
470-
}
471-
return false;
472-
}
473-
474-
// Determine whether slider should be enabled or disabled
475-
function shouldEnableSlider(timePeriod) {
476-
// Define earliest year slider should be shown for
477-
const sliderStartYear = 2024;
478-
479-
// Return whether or not slider should be enabled
480-
// Null timePeriod reflects no URL param setting yet -
481-
// this is actually default behavior
482-
if (!timePeriod || timePeriod >= sliderStartYear) {
483-
return true;
484-
}
485-
486-
return false;
487-
}
488-
489-
function handleChange() {
490-
// First, safety check - if the button isn't even
491-
// supposed to be shown, do nothing
492-
if (!shouldEnableSlider(timePeriod)) {
493-
return;
494-
}
495-
496-
// Duplicate the existing search params
497-
let newSearch = copySearchParams(searchParams);
498-
499-
// Set params accordingly
500-
if (isChecked) {
501-
newSearch.delete("dataset");
502-
setIsChecked(false);
503-
} else {
504-
newSearch.set("dataset", "enhanced_cps");
505-
setIsChecked(true);
506-
}
507-
setSearchParams(newSearch);
508-
}
509-
510-
return (
511-
<div
512-
style={{
513-
display: "flex",
514-
flexDirection: "row",
515-
justifyContent: "flex-start",
516-
alignItems: "center",
517-
gap: "10px",
518-
}}
519-
>
520-
<Switch
521-
data-testid="enhanced_cps_switch"
522-
size={displayCategory !== "mobile" && "small"}
523-
onChange={handleChange}
524-
disabled={!shouldEnableSlider(timePeriod)}
525-
checked={presentDataset === "enhanced_cps" ? true : false}
526-
/>
527-
<p
528-
style={{
529-
margin: 0,
530-
fontSize: displayCategory !== "mobile" && "0.95em",
531-
color: !shouldEnableSlider(timePeriod) && "rgba(0,0,0,0.5)",
532-
cursor: !shouldEnableSlider(timePeriod) && "not-allowed",
533-
}}
534-
>
535-
Use Enhanced CPS (beta)
536-
</p>
537-
<Tooltip
538-
placement="topRight"
539-
title="Currently available for US-wide simulations only."
540-
trigger={displayCategory === "mobile" ? "click" : "hover"}
541-
>
542-
<QuestionCircleOutlined
543-
style={{
544-
color: "rgba(0, 0, 0, 0.85)",
545-
opacity: 0.85,
546-
cursor: "pointer",
547-
}}
548-
/>
549-
</Tooltip>
550-
</div>
551-
);
552-
}
553459

554460
function PolicyNamer(props) {
555461
const { policy, metadata } = props;
@@ -838,8 +744,6 @@ export default function PolicyRightSidebar(props) {
838744

839745
const isMultiYear = determineIfMultiYear(searchParams);
840746

841-
let dataset = searchParams.get("dataset");
842-
843747
const options = metadata.economy_options.region.map((stateAbbreviation) => {
844748
return { value: stateAbbreviation.name, label: stateAbbreviation.label };
845749
});
@@ -1138,12 +1042,6 @@ export default function PolicyRightSidebar(props) {
11381042
}}
11391043
/>
11401044
</div>
1141-
{metadata.countryId === "us" && (
1142-
<DatasetSelector
1143-
presentDataset={dataset}
1144-
timePeriod={timePeriod}
1145-
/>
1146-
)}
11471045
{MULTI_YEAR_SELECTOR_PERMITTED_COUNTRIES.includes(
11481046
metadata.countryId,
11491047
) && (
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
We're excited to announce the full launch of the Enhanced Current Population Survey (Enhanced CPS). This comprehensive dataset powers PolicyEngine's microsimulation modeling with unprecedented accuracy, enabling precise analysis of tax and benefit reforms across the United States.
2+
3+
Building on extensive development and testing since our [beta launch](/us/research/enhanced-cps-beta), the Enhanced CPS now includes sophisticated imputation models for income sources and household characteristics that were previously unavailable or underreported in survey data. These improvements provide a more complete picture of American households' economic circumstances. Learn more about our [methodology](https://policyengine.github.io/policyengine-us-data/methodology) and [data sources](https://policyengine.github.io/policyengine-us-data/data).
4+
5+
## New features
6+
7+
### Income source imputations
8+
9+
The Enhanced CPS now includes machine learning-based imputations for income sources that are frequently underreported in surveys:
10+
11+
**Tip income**: Using employer-reported data from the Survey of Income and Program Participation (SIPP), we impute tip income based on employment income, age, and household composition. This enhancement enables accurate analysis of proposals to exempt tips from taxation. See our [imputation methodology documentation](https://policyengine.github.io/policyengine-us-data/methodology#imputation) for technical details.
12+
13+
**Overtime premiums**: We calculate overtime income using hours worked, occupation codes, and Fair Labor Standards Act exemption status, allowing accurate modeling of overtime exemption proposals.
14+
15+
**Auto loan interest**: Imputed from the Survey of Consumer Finances (SCF), this addition enables analysis of proposals to make auto loan interest deductible.
16+
17+
### Immigration status imputation
18+
19+
We've implemented the ASEC Undocumented Algorithm to impute Social Security Number card types, enabling more accurate modeling of policies with citizenship or work authorization requirements. This process-of-elimination approach examines 14 conditions to identify likely undocumented individuals, calibrated to match external population estimates. Details are available in our [demographic imputation section](https://policyengine.github.io/policyengine-us-data/methodology#demographic-imputation).
20+
21+
### Technical infrastructure improvements
22+
23+
**Two-stage methodology**: Our approach combines sophisticated imputation with advanced reweighting techniques. First, we use Quantile Regression Forests (QRF) to impute missing variables from multiple data sources, preserving realistic variation and capturing conditional distribution tails. Second, we apply gradient-based optimization with PyTorch to reweight households, matching administrative targets while maintaining the survey's statistical properties.
24+
25+
![Enhanced CPS methodology flowchart showing the two-stage process of imputation and reweighting](/images/posts/enhanced-cps-launch-flowchart.png)
26+
27+
The process flow integrates five source datasets (CPS ASEC, IRS PUF, SIPP, SCF, ACS) that are aged to the target year. Through QRF imputation, we create two enhanced CPS variants: one with missing PUF variables filled in, and another with existing variables replaced by PUF values. These datasets then undergo reweighting optimization to produce the final Enhanced CPS dataset. [View our detailed methodology documentation](https://policyengine.github.io/policyengine-us-data/methodology).
28+
29+
**Microimpute package**: We've developed and adopted [`microimpute`](https://github.com/PolicyEngine/microimpute), a new open-source Python package that automates our QRF-based imputation methods. This package makes our imputation methodology more transparent and reusable.
30+
31+
**Advanced reweighting with L0 regularization**: Our reweighting process uses log-transformed weights with dropout regularization and incorporates an L0 penalty for sparsity. This ensures positive weights while preventing overfitting and maintaining interpretability. The optimization minimizes mean squared relative error using the Adam optimizer.
32+
33+
**Enhanced validation**: Our [calibration process](https://policyengine.github.io/policyengine-us-data/methodology#calibration) targets 9,168 administrative totals from sources including IRS SOI, Census, CBO/Treasury, and JCT data, ensuring the Enhanced CPS accurately represents:
34+
35+
- Income components by source
36+
- Benefit program enrollment
37+
- Demographic distributions
38+
- Geographic population counts
39+
40+
## Upcoming developments
41+
42+
### State and local calibration
43+
44+
With support from [Arnold Ventures](https://www.arnoldventures.org/), we're extending the Enhanced CPS to provide accurate estimates for every state and congressional district. This follows our successful implementation of local-area microsimulation in the UK, funded by the [Nuffield Foundation](https://www.nuffieldfoundation.org/).
45+
46+
Once complete, the Enhanced CPS will become the default for state-level analysis as well, and PolicyEngine users will be able to analyze the impacts of federal and state policy reforms on:
47+
48+
- Poverty rates by state and congressional district
49+
- Income inequality measures for local areas
50+
- Winners and losers from reforms in specific districts
51+
- Distributional impacts by income decile for each state
52+
53+
### Microcalibrate package
54+
55+
We're developing [`microcalibrate`](https://github.com/PolicyEngine/microcalibrate), a next-generation reweighting package that enhances our current gradient descent approach. This package will offer:
56+
57+
- Faster convergence to calibration targets
58+
- Better preservation of the original survey's covariance structure
59+
- More flexible loss functions for different use cases
60+
- Easier extension to multi-area calibration
61+
62+
### Additional data enhancements
63+
64+
We currently integrate data from the Survey of Consumer Finances (for auto loan interest) and American Community Survey (for housing costs). Future enhancements will expand these integrations:
65+
66+
- **Wealth modeling from SCF**: Comprehensive asset and debt data for modeling asset limits in SNAP, SSI, and other means-tested programs, similar to our [wealth modeling in the UK](https://policyengine.org/uk/research/uk-the-new-policyengine)
67+
- **Consumer Expenditure Survey**: Consumption patterns for modeling sales taxes, carbon pricing, and other consumption-based policies
68+
- **Expanded ACS integration**: Additional geographic and demographic detail for state and local policy analysis
69+
70+
## Using the Enhanced CPS
71+
72+
The Enhanced CPS is now the exclusive dataset for nationwide PolicyEngine US analyses. We've removed the dataset selector to streamline the user experience—the Enhanced CPS automatically powers all federal policy simulations, while state-specific analyses continue to use the standard CPS until our local calibration is complete.
73+
74+
You can access the Enhanced CPS through:
75+
76+
**Web interface**: The Enhanced CPS powers all nationwide calculations at [policyengine.org/us](https://policyengine.org/us)
77+
78+
**Python package**: Works by default for our `Microsimulation` calls.
79+
80+
**Direct download**: For Python users, the data automatically downloads from our Hugging Face repository when you instantiate a simulation. The files are stored at [`hf://policyengine/policyengine-us-data`](https://huggingface.co/policyengine/policyengine-us-data)
81+
82+
## Technical details
83+
84+
For researchers interested in our methodology:
85+
86+
- **Full technical documentation**: [PolicyEngine US Data documentation](https://policyengine.github.io/policyengine-us-data)
87+
- **Data integration methodology**: [Imputation and fusion techniques](https://policyengine.github.io/policyengine-us-data/methodology#data-fusion)
88+
- **Calibration approach**: [Reweighting methodology](https://policyengine.github.io/policyengine-us-data/methodology#reweighting)
89+
- **Validation results**: [Comparison with administrative data](https://policyengine.github.io/policyengine-us-data/discussion)
90+
- **Implementation code**: [Microimpute package](https://github.com/PolicyEngine/microimpute)
91+
- **Source code**: [Enhanced CPS on GitHub](https://github.com/PolicyEngine/policyengine-us-data/tree/main/policyengine_us_data/datasets/cps)
92+
93+
## Conclusion
94+
95+
The Enhanced CPS represents a major advancement in open-source microsimulation data. By [combining the demographic richness of the Current Population Survey with tax detail from IRS records](https://policyengine.github.io/policyengine-us-data/background) and sophisticated imputation techniques, we've created a dataset that supports comprehensive analysis of both tax and benefit policies.
96+
97+
As we expand to state and local calibration, the Enhanced CPS will enable unprecedented granularity in policy analysis—empowering lawmakers, researchers, and citizens to understand how proposed reforms would affect their communities.
98+
99+
We welcome feedback and collaboration as we continue improving this foundational infrastructure for evidence-based policymaking. For questions or to contribute to development, please visit our [GitHub repositories](https://github.com/PolicyEngine) or [contact us](mailto:hello@policyengine.org).

src/posts/posts.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
11
[
2+
{
3+
"title": "Enhanced CPS full launch: Comprehensive microdata for policy analysis",
4+
"description": "The Enhanced Current Population Survey now includes tip, overtime, and auto loan interest imputations, plus upcoming state and congressional district calibration.",
5+
"date": "2025-08-08",
6+
"tags": ["us", "data", "featured"],
7+
"filename": "enhanced-cps-launch.md",
8+
"image": "enhanced-cps-launch.png",
9+
"authors": ["max-ghenis", "nikhil-woodruff"]
10+
},
211
{
312
"title": "Analysis of individual income tax provisions in the final reconciliation bill",
413
"description": "Our simulation projects a reduction in federal revenues of $3.8 trillion from 2026 to 2035 compared to current law.",

0 commit comments

Comments
 (0)