Add cross-year smoothness penalty to calibrate_local_areas (#345 step 5)

vahid-ahmadi · claude · vahid-ahmadi · commit b99d11db92ac · 2026-04-17T12:17:59.000+01:00
Adds an opt-in log-space L2 penalty to the training loss in
`calibrate_local_areas` that pulls the optimised weights towards a prior
year's weights. This is the regulariser that makes a sequence of
per-year calibrations statistically coherent as a panel — without it,
the same household can represent, say, 500 units in 2024 and 50 in 2025.

Design choices:

- The penalty is factored out into a pure helper
  `compute_log_weight_smoothness_penalty(log_weights, prior_weights)`
  so it can be unit-tested thoroughly. Entries where the prior is zero
  (households outside an area's country) are excluded from the mean so
  they neither pull nor inflate the penalty.
- `calibrate_local_areas` gains two keyword-only kwargs, `prior_weights`
  and `smoothness_penalty`, both defaulting to values that reproduce the
  pre-step-5 training loop exactly.
- Shape mismatches raise a clear `ValueError` rather than failing
  deep inside the optimiser.
- The penalty is computed from the underlying log-space weights (not
  the dropout-augmented tensor fed into the fit loss) so the regulariser
  does not double-count the dropout noise.

Tests (15 new, all in two files):

- 10 unit tests on the helper covering zero-when-equal, quadratic
  scaling, masking of zero-prior entries, gradient masking, shape
  validation, symmetric log deviation, differentiability, dtype
  round-trip and a hand-computed heterogeneous case.
- 5 integration tests on `calibrate_local_areas` with a three-household
  fake dataset: default kwargs reproduce pre-step-5 behaviour, shape
  mismatch raises, `None` prior + penalty is a no-op, zero penalty +
  prior is a no-op, and a large penalty measurably pulls weights
  towards the prior versus a no-smoothness run.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/changelog.d/345.md b/changelog.d/345.md
@@ -1 +1 @@
-Add panel ID contract, `create_yearly_snapshots` helper, `age_dataset` demographic ageing module and year-aware loss matrices with a documented `resolve_target_value` fallback policy as the first four steps towards per-year snapshots (#345).
+Add panel ID contract, `create_yearly_snapshots` helper, `age_dataset` demographic ageing module, year-aware loss matrices with a documented `resolve_target_value` fallback policy, and a cross-year smoothness penalty on `calibrate_local_areas` as the first five steps towards per-year snapshots (#345).
diff --git a/policyengine_uk_data/tests/test_calibrate_smoothness_integration.py b/policyengine_uk_data/tests/test_calibrate_smoothness_integration.py
@@ -0,0 +1,196 @@
+"""Integration tests for the smoothness-penalty wiring in calibrate_local_areas.
+
+The unit tests for ``compute_log_weight_smoothness_penalty`` live in
+``test_smoothness_penalty.py``. The tests here exercise the surrounding
+plumbing: validation of the new kwargs, that default behaviour is
+unchanged, and that a large penalty actually pulls the optimised weights
+towards the prior.
+"""
+
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import pytest
+from policyengine_uk.data import UKSingleYearDataset
+
+from policyengine_uk_data.utils import calibrate as calibrate_module
+from policyengine_uk_data.utils.calibrate import calibrate_local_areas
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+def _tiny_dataset() -> UKSingleYearDataset:
+    """Three-household dataset just big enough for calibration shapes."""
+    household = pd.DataFrame(
+        {
+            "household_id": [1, 2, 3],
+            "household_weight": [1000.0, 1000.0, 1000.0],
+        }
+    )
+    benunit = pd.DataFrame({"benunit_id": [101, 201, 301]})
+    person = pd.DataFrame(
+        {
+            "person_id": [1001, 2001, 3001],
+            "person_benunit_id": [101, 201, 301],
+            "person_household_id": [1, 2, 3],
+            "age": [30, 40, 50],
+        }
+    )
+    return UKSingleYearDataset(
+        person=person, benunit=benunit, household=household, fiscal_year=2025
+    )
+
+
+AREA_COUNT = 2
+
+
+def _fake_local_matrix(dataset):
+    """Two areas, three households, one target per area.
+
+    Each target is the sum of household_weight over the households in
+    that area. With default initial weights the target is easy to learn.
+    """
+    matrix = pd.DataFrame({"pop/area_size": [1.0, 1.0, 1.0]})
+    y = pd.DataFrame({"pop/area_size": [3000.0, 3000.0]})
+    # Simple country mask: both areas include all households.
+    r = np.ones((AREA_COUNT, 3))
+    return matrix, y, r
+
+
+def _fake_national_matrix(dataset):
+    matrix = pd.DataFrame({"pop/national": [1.0, 1.0, 1.0]})
+    y = pd.DataFrame({"pop/national": [6000.0]})
+    return matrix, y
+
+
+@pytest.fixture
+def patched_storage(tmp_path: Path, monkeypatch):
+    """Redirect the hard-coded STORAGE_FOLDER write in calibrate.py."""
+    monkeypatch.setattr(calibrate_module, "STORAGE_FOLDER", tmp_path)
+    return tmp_path
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+def test_default_kwargs_reproduce_pre_step5_behaviour(patched_storage):
+    """No prior + zero penalty ⇒ the smoothness branch must be inert."""
+    # NB: calibrate_local_areas only flushes the weight file when the
+    # final epoch index is a multiple of 10 (the function saves every 10
+    # epochs). Use 11 epochs so the final epoch = 10 triggers a save.
+    np.random.seed(0)
+    import torch
+
+    torch.manual_seed(0)
+    calibrate_local_areas(
+        dataset=_tiny_dataset(),
+        matrix_fn=_fake_local_matrix,
+        national_matrix_fn=_fake_national_matrix,
+        area_count=AREA_COUNT,
+        weight_file="test_weights.h5",
+        epochs=11,
+    )
+    assert (patched_storage / "test_weights.h5").exists()
+
+
+def test_shape_mismatch_in_prior_raises(patched_storage):
+    bogus_prior = np.ones((AREA_COUNT, 99))  # wrong household count
+    with pytest.raises(ValueError, match="prior_weights shape"):
+        calibrate_local_areas(
+            dataset=_tiny_dataset(),
+            matrix_fn=_fake_local_matrix,
+            national_matrix_fn=_fake_national_matrix,
+            area_count=AREA_COUNT,
+            weight_file="test_weights.h5",
+            epochs=1,
+            prior_weights=bogus_prior,
+            smoothness_penalty=1.0,
+        )
+
+
+def test_none_prior_with_penalty_is_noop(patched_storage):
+    """A penalty coefficient without a prior must not crash."""
+    calibrate_local_areas(
+        dataset=_tiny_dataset(),
+        matrix_fn=_fake_local_matrix,
+        national_matrix_fn=_fake_national_matrix,
+        area_count=AREA_COUNT,
+        weight_file="test_weights.h5",
+        epochs=1,
+        prior_weights=None,
+        smoothness_penalty=10.0,
+    )
+
+
+def test_zero_penalty_with_prior_is_noop(patched_storage):
+    """A prior without a penalty coefficient must not crash either."""
+    prior = np.ones((AREA_COUNT, 3)) * 500.0
+    calibrate_local_areas(
+        dataset=_tiny_dataset(),
+        matrix_fn=_fake_local_matrix,
+        national_matrix_fn=_fake_national_matrix,
+        area_count=AREA_COUNT,
+        weight_file="test_weights.h5",
+        epochs=1,
+        prior_weights=prior,
+        smoothness_penalty=0.0,
+    )
+
+
+def test_large_penalty_keeps_weights_near_prior(patched_storage):
+    """With a huge penalty, the optimised weights should stay near the prior."""
+    import h5py
+
+    # Prior that is deliberately far from what the fit-loss alone would
+    # drive us to (fit alone wants ~1000 per household per area to match
+    # the area target; this prior has 10x larger values).
+    prior = np.ones((AREA_COUNT, 3)) * 10_000.0
+
+    np.random.seed(0)
+    import torch
+
+    torch.manual_seed(0)
+    calibrate_local_areas(
+        dataset=_tiny_dataset(),
+        matrix_fn=_fake_local_matrix,
+        national_matrix_fn=_fake_national_matrix,
+        area_count=AREA_COUNT,
+        weight_file="with_smoothness.h5",
+        # 21 epochs ⇒ final index 20 is a multiple of 10 → save triggers.
+        epochs=21,
+        prior_weights=prior,
+        smoothness_penalty=1e6,
+    )
+
+    with h5py.File(patched_storage / "with_smoothness.h5", "r") as f:
+        final_with = np.array(f["2025"])
+
+    # And the same run without the smoothness penalty.
+    np.random.seed(0)
+    torch.manual_seed(0)
+    calibrate_local_areas(
+        dataset=_tiny_dataset(),
+        matrix_fn=_fake_local_matrix,
+        national_matrix_fn=_fake_national_matrix,
+        area_count=AREA_COUNT,
+        weight_file="without_smoothness.h5",
+        epochs=21,
+    )
+
+    with h5py.File(patched_storage / "without_smoothness.h5", "r") as f:
+        final_without = np.array(f["2025"])
+
+    # With the huge penalty, weights should be closer (in log-space) to
+    # the prior than the no-smoothness run.
+    log_dev_with = np.mean((np.log(final_with + 1e-8) - np.log(prior)) ** 2)
+    log_dev_without = np.mean((np.log(final_without + 1e-8) - np.log(prior)) ** 2)
+    assert log_dev_with < log_dev_without, (
+        f"Smoothness failed to pull weights towards prior: "
+        f"with={log_dev_with:.4f} vs without={log_dev_without:.4f}"
+    )
diff --git a/policyengine_uk_data/tests/test_smoothness_penalty.py b/policyengine_uk_data/tests/test_smoothness_penalty.py
@@ -0,0 +1,107 @@
+"""Tests for the cross-year smoothness penalty (step 5 of #345)."""
+
+import pytest
+import torch
+
+from policyengine_uk_data.utils.calibrate import (
+    compute_log_weight_smoothness_penalty,
+)
+
+
+def test_zero_when_log_weights_match_log_prior():
+    """If current weights already equal the prior, the penalty is zero."""
+    prior = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
+    log_weights = torch.log(prior)
+    penalty = compute_log_weight_smoothness_penalty(log_weights, prior)
+    assert penalty.item() == pytest.approx(0.0)
+
+
+def test_penalty_scales_with_squared_log_deviation():
+    """A log-ratio of ln(2) on every entry → penalty = (ln 2)**2."""
+    prior = torch.ones(3, 4)
+    # log_weights = log(2 * prior) = log(2)
+    log_weights = torch.full((3, 4), float(torch.log(torch.tensor(2.0))))
+    penalty = compute_log_weight_smoothness_penalty(log_weights, prior)
+    assert penalty.item() == pytest.approx(
+        float(torch.log(torch.tensor(2.0))) ** 2, rel=1e-6
+    )
+
+
+def test_zero_prior_entries_are_excluded_from_mean():
+    """Households outside an area's country (prior == 0) must not inflate the penalty."""
+    prior = torch.tensor([[1.0, 0.0], [0.0, 1.0]])
+    log_weights = torch.zeros_like(prior)  # log(1) on the valid entries
+    penalty = compute_log_weight_smoothness_penalty(log_weights, prior)
+    # Only two entries are valid and both match the prior → penalty is zero.
+    assert penalty.item() == pytest.approx(0.0)
+
+
+def test_zero_prior_entries_do_not_pull_gradient():
+    """Gradient w.r.t. a masked-out entry must be exactly zero."""
+    prior = torch.tensor([[0.0, 2.0]])
+    log_weights = torch.tensor([[100.0, 0.0]], requires_grad=True)
+    penalty = compute_log_weight_smoothness_penalty(log_weights, prior)
+    penalty.backward()
+    # First entry is masked out → grad should be zero regardless of value.
+    assert log_weights.grad[0, 0].item() == pytest.approx(0.0)
+    # Second entry pulled towards log(2).
+    assert log_weights.grad[0, 1].item() != 0.0
+
+
+def test_all_zero_prior_returns_zero_without_nan():
+    """No valid entries → zero, not NaN."""
+    prior = torch.zeros(2, 2)
+    log_weights = torch.randn(2, 2)
+    penalty = compute_log_weight_smoothness_penalty(log_weights, prior)
+    assert penalty.item() == 0.0
+    assert not torch.isnan(penalty)
+
+
+def test_shape_mismatch_raises_valueerror():
+    prior = torch.ones(3, 4)
+    log_weights = torch.zeros(3, 5)
+    with pytest.raises(ValueError, match="shape"):
+        compute_log_weight_smoothness_penalty(log_weights, prior)
+
+
+def test_symmetric_log_deviation():
+    """Doubling the prior and halving it produce the same penalty magnitude."""
+    prior = torch.ones(2, 2)
+    log_weights_double = torch.full((2, 2), float(torch.log(torch.tensor(2.0))))
+    log_weights_half = torch.full((2, 2), -float(torch.log(torch.tensor(2.0))))
+    a = compute_log_weight_smoothness_penalty(log_weights_double, prior)
+    b = compute_log_weight_smoothness_penalty(log_weights_half, prior)
+    assert a.item() == pytest.approx(b.item())
+
+
+def test_penalty_is_differentiable():
+    """The result must carry a grad so Adam can actually use it."""
+    prior = torch.ones(2, 3)
+    log_weights = torch.randn(2, 3, requires_grad=True)
+    penalty = compute_log_weight_smoothness_penalty(log_weights, prior)
+    assert penalty.requires_grad
+    penalty.backward()
+    assert log_weights.grad is not None
+    # Some entry must see a non-zero gradient for a non-trivial prior.
+    assert torch.any(log_weights.grad != 0)
+
+
+def test_device_and_dtype_round_trip():
+    """The output dtype matches the log_weights dtype (not the prior's)."""
+    prior = torch.ones(2, 2, dtype=torch.float32)
+    log_weights = torch.zeros(2, 2, dtype=torch.float64)
+    penalty = compute_log_weight_smoothness_penalty(log_weights, prior)
+    assert penalty.dtype == torch.float64
+
+
+def test_heterogeneous_mask_and_values():
+    """Explicit hand-computed example to lock in the arithmetic."""
+    # prior = [[1, 0], [4, e]]  ⇒  valid entries are (0,0), (1,0), (1,1).
+    e = float(torch.e)
+    prior = torch.tensor([[1.0, 0.0], [4.0, e]])
+    # log_weights = [[0, any], [0, 0]]  ⇒  deviations on valid entries
+    # are: (0 - log 1)=0,  (0 - log 4)=-2 log 2,  (0 - log e)=-1.
+    log_weights = torch.tensor([[0.0, 999.0], [0.0, 0.0]])
+    penalty = compute_log_weight_smoothness_penalty(log_weights, prior)
+    expected = (0.0**2 + (2 * torch.log(torch.tensor(2.0))).item() ** 2 + 1.0**2) / 3
+    assert penalty.item() == pytest.approx(expected, rel=1e-5)
diff --git a/policyengine_uk_data/utils/calibrate.py b/policyengine_uk_data/utils/calibrate.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-Add panel ID contract, `create_yearly_snapshots` helper, `age_dataset` demographic ageing module and year-aware loss matrices with a documented `resolve_target_value` fallback policy as the first four steps towards per-year snapshots (#345).
	`1`	+Add panel ID contract, `create_yearly_snapshots` helper, `age_dataset` demographic ageing module, year-aware loss matrices with a documented `resolve_target_value` fallback policy, and a cross-year smoothness penalty on `calibrate_local_areas` as the first five steps towards per-year snapshots (#345).