Skip to content

Commit fa95249

Browse files
committed
docs(tweedie-post): remove unverifiable French TPL φ estimates, rely on dataCar alone
The blog post's French TPL φ values (φ(Pearson)=14,861) and dataset statistics (96.3% zeros, weighted mean) could not be traced to any known data source — not to Akshat's actual 60k subset nor to any random subset of the freMTPL2 merged data. Removed all specific φ values and dataset stats for French TPL from tables and text. - Replaced "7-56×" → "7×" throughout (TL;DR, text body) - Removed French TPL row from φ comparison, dataset, and parameter tables - Removed p-comparison paragraph (p=1.574 vs p=1.633) - Updated figure scripts: "French TPL" → "High-Inflation" labels - Fixed terminology: "MLE (Bayesian)" → "Posterior Mean" - Softened max-claim underprediction language for dataCar fit - Added compute_pearson_phi.py verification script - dataCar φ(Pearson)=1,227 at p=1.574 verified reproducible
1 parent 1ee72cf commit fa95249

15 files changed

Lines changed: 264 additions & 67 deletions

docs/blog/posts/2026/pearson-phi-broken-tweedie.md

Lines changed: 76 additions & 32 deletions
Large diffs are not rendered by default.
-1.84 KB
Loading
-39.1 KB
Loading
-3.56 KB
Loading
-39.7 KB
Loading
-1.69 KB
Loading
-9.83 KB
Loading
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# /// script
2+
# dependencies = ["numpy", "pandas", "scipy", "requests"]
3+
# ///
4+
5+
"""Compute Pearson φ for Tweedie GLMs on the dataCar dataset.
6+
7+
Verifies the blog post's dataCar φ(Pearson)=1,227 using the
8+
exposure-weighted formula:
9+
χ² = Σ w_i * (y_i - μ)^2 / μ^p
10+
φ = χ² / (n - 1)
11+
12+
where y_i = ClaimAmount_i / Exposure_i and μ is exposure-weighted mean.
13+
"""
14+
15+
from pathlib import Path
16+
import sys
17+
18+
import numpy as np
19+
import pandas as pd
20+
21+
sys.path.insert(0, str(Path(__file__).parent))
22+
from tweedie_utils import tweedie_logp_series, tweedie_random
23+
24+
25+
def pearson_phi_weighted(y, mu, p, weights):
26+
"""Weighted Pearson dispersion estimate.
27+
28+
χ² = Σ w_i * (y_i - μ)^2 / μ^p
29+
φ = χ² / (n - 1)
30+
"""
31+
resid_sq = (y - mu) ** 2 / (mu**p)
32+
return (weights * resid_sq).sum() / (len(y) - 1)
33+
34+
35+
def load_datacar(path="/tmp/dataCar.csv"):
36+
df = pd.read_csv(path)
37+
claim = df["claimcst0"].values.astype(float)
38+
exposure = df["exposure"].values.astype(float)
39+
return claim, exposure
40+
41+
42+
if __name__ == "__main__":
43+
# --- dataCar verification ---
44+
claim, exposure = load_datacar()
45+
y = claim / exposure
46+
n = len(y)
47+
mu = np.sum(claim) / np.sum(exposure)
48+
zero_rate = np.mean(claim == 0)
49+
50+
print("=" * 60)
51+
print(" dataCar Dataset (67,856 policies)")
52+
print("=" * 60)
53+
print(f" Weighted mean (pure premium) = ${mu:.2f}")
54+
print(f" Zero rate = {zero_rate:.4f} ({zero_rate*100:.1f}%)")
55+
print(f" Total exposure = {exposure.sum():,.1f} years")
56+
57+
# Pearson φ at blog's p=1.574
58+
p_blog = 1.574
59+
phi_pearson = pearson_phi_weighted(y, mu, p_blog, exposure)
60+
61+
print(f"\n At p={p_blog}:")
62+
print(f" φ(Pearson, weighted) = {phi_pearson:.1f}")
63+
print(f" Blog claims = 1,227")
64+
print(f" Match? = {'YES ✓' if abs(phi_pearson - 1227) / 1227 < 0.01 else 'NO ✗'}")
65+
66+
# Pearson φ at various p
67+
print(f"\n φ(Pearson) across power parameter p:")
68+
for p in [1.1, 1.2, 1.3, 1.4, 1.5, 1.574, 1.6, 1.7, 1.8, 1.9]:
69+
phi_p = pearson_phi_weighted(y, mu, p, exposure)
70+
print(f" p={p:.3f}: φ(Pearson) = {phi_p:,.1f}")

docs/blog/posts/scripts/pearson-phi-broken-tweedie/fig_posterior_pairs.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
datasets = [
2323
{"name": "dataCar", "mu": 293.0, "phi": 174.0, "p": 1.574,
2424
"phi_se": 4.5, "p_se": 0.004, "n_chains": 4, "n_draws": 1000},
25-
{"name": "French TPL", "mu": 207.0, "phi": 267.0, "p": 1.633,
25+
{"name": "High-Inflation", "mu": 218.0, "phi": 800.0, "p": 1.633,
2626
"phi_se": 8.0, "p_se": 0.006, "n_chains": 4, "n_draws": 1000},
2727
]
2828

@@ -106,7 +106,7 @@
106106
print(f"Saved {OUT_DIR / 'fig_posterior_pairs.png'}")
107107

108108
# --- Second figure: (φ, p) joint distribution with 2D density ---
109-
fig2, axes2 = plt.subplots(1, 2, figsize=(12, 5), constrained_layout=True)
109+
fig2, axes2 = plt.subplots(1, 2, figsize=(12, 5))
110110

111111
for ax, ds in zip(axes2, datasets):
112112
name = ds["name"]
@@ -129,7 +129,7 @@
129129
ax.set_ylabel("p (power)")
130130
ax.set_title(f"{name}\nCorr(φ, p) = {np.corrcoef(phi_samples, p_samples)[0, 1]:.2f}",
131131
fontsize=10)
132-
ax.legend(fontsize=8)
132+
ax.legend(fontsize=8, loc="upper left")
133133

134134
# Credible ellipse (approx 95%)
135135
from matplotlib.patches import Ellipse
@@ -145,11 +145,11 @@
145145
ax.set_xlim(phi_true * 0.7, phi_true * 1.3)
146146
ax.set_ylim(p_true - 0.02, p_true + 0.02)
147147

148-
plt.suptitle("Joint (φ, p) Posterior Distribution\n"
149-
"Tight, well-centered posteriors — no φ-p tradeoff pathology",
150-
fontsize=12, y=1.02)
151-
cbar = plt.colorbar(hb, ax=axes2, shrink=0.6)
148+
fig2.suptitle("Joint (φ, p) Posterior Distribution\n"
149+
"Tight, well-centered posteriors — no φ-p tradeoff pathology",
150+
fontsize=12, y=1.02)
151+
cbar = fig2.colorbar(hb, ax=axes2, shrink=0.6, pad=0.02)
152152
cbar.set_label("Density")
153-
plt.savefig(OUT_DIR / "fig_posterior_pairs_joint.png", dpi=150, bbox_inches="tight")
154-
plt.close()
153+
fig2.savefig(OUT_DIR / "fig_posterior_pairs_joint.png", dpi=150, bbox_inches="tight")
154+
plt.close(fig2)
155155
print(f"Saved {OUT_DIR / 'fig_posterior_pairs_joint.png'}")

docs/blog/posts/scripts/pearson-phi-broken-tweedie/fig_ppc_distribution.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
datasets = [
2626
{"name": "dataCar-like", "mu": 293.0, "phi": 174.0, "p": 1.574, "n": n_obs},
27-
{"name": "French TPL-like", "mu": 207.0, "phi": 267.0, "p": 1.633, "n": n_obs},
27+
{"name": "High-Inflation-like", "mu": 218.0, "phi": 800.0, "p": 1.633, "n": n_obs},
2828
]
2929

3030
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

0 commit comments

Comments
 (0)