Skip to content

Commit f17b4cb

Browse files
authored
Migrate GREG calibration to svy (#1135)
1 parent 7e5a3f7 commit f17b4cb

8 files changed

Lines changed: 212 additions & 50 deletions

File tree

.github/workflows/pr.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,23 @@ jobs:
132132
env:
133133
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
134134

135+
calibration-extra-tests:
136+
name: Calibration extra tests
137+
runs-on: ubuntu-latest
138+
needs: [check-fork, lint]
139+
steps:
140+
- uses: actions/checkout@v6
141+
- uses: actions/setup-python@v6
142+
with:
143+
python-version: "3.14"
144+
- uses: astral-sh/setup-uv@v8.1.0
145+
- run: uv sync --dev --extra calibration
146+
- name: Run svy GREG adapter regression
147+
run: >
148+
uv run --extra calibration pytest
149+
tests/unit/test_long_term_calibration_contract.py::test_greg_calibrator_hits_linear_controls_with_svy
150+
-v
151+
135152
bundle-release-manifest-contract:
136153
name: Validate bundle release manifest contract
137154
runs-on: ubuntu-latest

changelog.d/1129.changed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Migrate long-term GREG calibration from archived samplics to svy.

policyengine_us_data/datasets/cps/long_term/assess_publishable_horizon.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from policyengine_us import Microsimulation
1313

1414
from calibration import build_calibration_audit, calibrate_weights
15+
from calibration import GregCalibrator
1516
from calibration_profiles import (
1617
approximate_window_for_year,
1718
classify_calibration_quality,
@@ -35,12 +36,6 @@
3536
set_long_term_target_source,
3637
)
3738

38-
try:
39-
from samplics.weighting import SampleWeight
40-
except ImportError: # pragma: no cover - only needed for greg profiles
41-
SampleWeight = None
42-
43-
4439
DEFAULT_BASE_DATASET_PATH = (
4540
"hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"
4641
)
@@ -90,12 +85,7 @@ def parse_years(raw: str) -> list[int]:
9085
def maybe_build_calibrator(method: str):
9186
if method != "greg":
9287
return None
93-
if SampleWeight is None:
94-
raise ImportError(
95-
"samplics is required for GREG calibration. "
96-
"Install with: pip install policyengine-us-data[calibration]"
97-
)
98-
return SampleWeight()
88+
return GregCalibrator()
9989

10090

10191
def benchmark_tob_values(

policyengine_us_data/datasets/cps/long_term/calibration.py

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,66 @@
33
from scipy import optimize, sparse
44

55

6+
class GregCalibrator:
7+
"""Small adapter around svy's GREG calibration workflow."""
8+
9+
_base_weight_column = "_policyengine_base_weight"
10+
_calibrated_weight_column = "_policyengine_greg_weight"
11+
12+
def __init__(self):
13+
try:
14+
import polars as pl
15+
import svy
16+
except ImportError as e: # pragma: no cover - exercised without extra
17+
raise ImportError(
18+
"svy is required for GREG calibration. "
19+
"Install with: pip install policyengine-us-data[calibration]"
20+
) from e
21+
22+
self._pl = pl
23+
self._svy = svy
24+
25+
def calibrate(self, *, samp_weight, aux_vars, control):
26+
control = {str(name): float(target) for name, target in control.items()}
27+
aux_df = self._auxiliary_dataframe(aux_vars, list(control))
28+
aux_df[self._base_weight_column] = np.asarray(samp_weight, dtype=float)
29+
30+
sample = self._svy.Sample(
31+
self._pl.from_pandas(aux_df),
32+
design=self._svy.Design(wgt=self._base_weight_column),
33+
)
34+
sample.weighting.calibrate(
35+
controls=control,
36+
wgt_name=self._calibrated_weight_column,
37+
)
38+
return (
39+
sample.data.get_column(self._calibrated_weight_column)
40+
.to_numpy()
41+
.astype(float)
42+
)
43+
44+
def _auxiliary_dataframe(self, aux_vars, control_names):
45+
if isinstance(aux_vars, pd.DataFrame):
46+
aux_df = aux_vars.copy()
47+
aux_df.columns = [str(column) for column in aux_df.columns]
48+
return aux_df
49+
50+
if sparse.issparse(aux_vars):
51+
aux_array = aux_vars.toarray()
52+
else:
53+
aux_array = np.asarray(aux_vars)
54+
55+
if aux_array.ndim == 1:
56+
aux_array = aux_array.reshape(-1, 1)
57+
58+
if aux_array.shape[1] != len(control_names):
59+
raise ValueError(
60+
"aux_vars column count must match the number of GREG controls"
61+
)
62+
63+
return pd.DataFrame(aux_array.astype(float), columns=control_names)
64+
65+
666
def _pct_error(achieved, target):
767
if target == 0:
868
return 0.0 if achieved == 0 else float("inf")
@@ -106,10 +166,10 @@ def calibrate_greg(
106166
n_ages=86,
107167
):
108168
"""
109-
Calibrate weights using GREG method via samplics.
169+
Calibrate weights using GREG method via svy.
110170
111171
Args:
112-
calibrator: SampleWeight instance from samplics
172+
calibrator: GregCalibrator instance
113173
X: Design matrix (n_households x n_ages)
114174
y_target: Target age distribution
115175
baseline_weights: Initial household weights

policyengine_us_data/datasets/cps/long_term/run_household_projection.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
set_long_term_target_source,
5757
)
5858
from calibration import (
59+
GregCalibrator,
5960
build_calibration_audit,
6061
build_clone_donor_component_weight_concentration_audit,
6162
build_clone_donor_family_weight_concentration_audit,
@@ -787,14 +788,7 @@ def _compose_reforms(*reforms):
787788
from ssa_data import load_hi_tob_projections, load_oasdi_tob_projections
788789

789790
if USE_GREG:
790-
try:
791-
from samplics.weighting import SampleWeight
792-
except ImportError:
793-
raise ImportError(
794-
"samplics is required for GREG calibration. "
795-
"Install with: pip install policyengine-us-data[calibration]"
796-
)
797-
calibrator = SampleWeight()
791+
calibrator = GregCalibrator()
798792
else:
799793
calibrator = None
800794

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ dependencies = [
5454

5555
[project.optional-dependencies]
5656
calibration = [
57-
"samplics",
57+
"svy>=0.18.2",
5858
]
5959
l0 = [
6060
"l0-python",

tests/unit/test_long_term_calibration_contract.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
)
2222
from policyengine_us_data.datasets.cps.long_term import run_long_term_production
2323
from policyengine_us_data.datasets.cps.long_term.calibration import (
24+
GregCalibrator,
2425
assess_nonnegative_feasibility,
2526
build_calibration_audit,
2627
calibrate_entropy,
@@ -1107,6 +1108,29 @@ def test_strict_greg_failure_raises():
11071108
)
11081109

11091110

1111+
def test_greg_calibrator_hits_linear_controls_with_svy():
1112+
pytest.importorskip("svy")
1113+
1114+
X = np.array([[1.0, 0.0], [0.0, 1.0]])
1115+
y_target = np.array([2.0, 3.0])
1116+
baseline_weights = np.array([1.0, 1.0])
1117+
1118+
weights, iterations, audit = calibrate_weights(
1119+
X=X,
1120+
y_target=y_target,
1121+
baseline_weights=baseline_weights,
1122+
method="greg",
1123+
calibrator=GregCalibrator(),
1124+
n_ages=2,
1125+
allow_fallback_to_ipf=False,
1126+
)
1127+
1128+
assert iterations == 1
1129+
assert audit["method_used"] == "greg"
1130+
assert audit["fell_back_to_ipf"] is False
1131+
np.testing.assert_allclose(X.T @ weights, y_target)
1132+
1133+
11101134
def test_build_calibration_audit_reports_constraint_error():
11111135
X = np.array([[1.0, 0.0], [0.0, 1.0]])
11121136
y_target = np.array([1.0, 1.0])

0 commit comments

Comments
 (0)