williambdean
diff --git a/‎docs/blog/posts/2026/pearson-phi-broken-tweedie.md‎
Lines changed: 76 additions & 32 deletions b/‎docs/blog/posts/2026/pearson-phi-broken-tweedie.md‎
Lines changed: 76 additions & 32 deletions
diff --git a/‎docs/blog/posts/images/fig_posterior_pairs.png‎
-1.84 KB b/‎docs/blog/posts/images/fig_posterior_pairs.png‎
-1.84 KB
diff --git a/‎docs/blog/posts/images/fig_posterior_pairs_joint.png‎
-39.1 KB b/‎docs/blog/posts/images/fig_posterior_pairs_joint.png‎
-39.1 KB
diff --git a/‎docs/blog/posts/images/fig_ppc_distribution.png‎
-3.56 KB b/‎docs/blog/posts/images/fig_ppc_distribution.png‎
-3.56 KB
diff --git a/‎docs/blog/posts/images/fig_ppc_validation.png‎
-39.7 KB b/‎docs/blog/posts/images/fig_ppc_validation.png‎
-39.7 KB
diff --git a/‎docs/blog/posts/images/fig_profile_likelihood.png‎
-1.69 KB b/‎docs/blog/posts/images/fig_profile_likelihood.png‎
-1.69 KB
diff --git a/‎docs/blog/posts/images/fig_zero_rate_comparison.png‎
-9.83 KB b/‎docs/blog/posts/images/fig_zero_rate_comparison.png‎
-9.83 KB
diff --git a/‎docs/blog/posts/scripts/pearson-phi-broken-tweedie/compute_pearson_phi.py‎
Lines changed: 70 additions & 0 deletions b/‎docs/blog/posts/scripts/pearson-phi-broken-tweedie/compute_pearson_phi.py‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎docs/blog/posts/scripts/pearson-phi-broken-tweedie/fig_posterior_pairs.py‎
Lines changed: 9 additions & 9 deletions b/‎docs/blog/posts/scripts/pearson-phi-broken-tweedie/fig_posterior_pairs.py‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎docs/blog/posts/scripts/pearson-phi-broken-tweedie/fig_ppc_distribution.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/blog/posts/scripts/pearson-phi-broken-tweedie/fig_ppc_distribution.py‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,70 @@
+# /// script
+# dependencies = ["numpy", "pandas", "scipy", "requests"]
+# ///
+
+"""Compute Pearson φ for Tweedie GLMs on the dataCar dataset.
+
+Verifies the blog post's dataCar φ(Pearson)=1,227 using the
+exposure-weighted formula:
+  χ² = Σ w_i * (y_i - μ)^2 / μ^p
+  φ = χ² / (n - 1)
+
+where y_i = ClaimAmount_i / Exposure_i and μ is exposure-weighted mean.
+"""
+
+from pathlib import Path
+import sys
+
+import numpy as np
+import pandas as pd
+
+sys.path.insert(0, str(Path(__file__).parent))
+from tweedie_utils import tweedie_logp_series, tweedie_random
+
+
+def pearson_phi_weighted(y, mu, p, weights):
+    """Weighted Pearson dispersion estimate.
+
+    χ² = Σ w_i * (y_i - μ)^2 / μ^p
+    φ = χ² / (n - 1)
+    """
+    resid_sq = (y - mu) ** 2 / (mu**p)
+    return (weights * resid_sq).sum() / (len(y) - 1)
+
+
+def load_datacar(path="/tmp/dataCar.csv"):
+    df = pd.read_csv(path)
+    claim = df["claimcst0"].values.astype(float)
+    exposure = df["exposure"].values.astype(float)
+    return claim, exposure
+
+
+if __name__ == "__main__":
+    # --- dataCar verification ---
+    claim, exposure = load_datacar()
+    y = claim / exposure
+    n = len(y)
+    mu = np.sum(claim) / np.sum(exposure)
+    zero_rate = np.mean(claim == 0)
+
+    print("=" * 60)
+    print("  dataCar Dataset (67,856 policies)")
+    print("=" * 60)
+    print(f"  Weighted mean (pure premium) = ${mu:.2f}")
+    print(f"  Zero rate                    = {zero_rate:.4f} ({zero_rate*100:.1f}%)")
+    print(f"  Total exposure               = {exposure.sum():,.1f} years")
+
+    # Pearson φ at blog's p=1.574
+    p_blog = 1.574
+    phi_pearson = pearson_phi_weighted(y, mu, p_blog, exposure)
+
+    print(f"\n  At p={p_blog}:")
+    print(f"    φ(Pearson, weighted) = {phi_pearson:.1f}")
+    print(f"    Blog claims          = 1,227")
+    print(f"    Match?               = {'YES ✓' if abs(phi_pearson - 1227) / 1227 < 0.01 else 'NO ✗'}")
+
+    # Pearson φ at various p
+    print(f"\n  φ(Pearson) across power parameter p:")
+    for p in [1.1, 1.2, 1.3, 1.4, 1.5, 1.574, 1.6, 1.7, 1.8, 1.9]:
+        phi_p = pearson_phi_weighted(y, mu, p, exposure)
+        print(f"    p={p:.3f}: φ(Pearson) = {phi_p:,.1f}")
@@ -22,7 +22,7 @@
 datasets = [
     {"name": "dataCar", "mu": 293.0, "phi": 174.0, "p": 1.574,
      "phi_se": 4.5, "p_se": 0.004, "n_chains": 4, "n_draws": 1000},
-    {"name": "French TPL", "mu": 207.0, "phi": 267.0, "p": 1.633,
+    {"name": "High-Inflation", "mu": 218.0, "phi": 800.0, "p": 1.633,
      "phi_se": 8.0, "p_se": 0.006, "n_chains": 4, "n_draws": 1000},
 ]
 
@@ -106,7 +106,7 @@
 print(f"Saved {OUT_DIR / 'fig_posterior_pairs.png'}")
 
 # --- Second figure: (φ, p) joint distribution with 2D density ---
-fig2, axes2 = plt.subplots(1, 2, figsize=(12, 5), constrained_layout=True)
+fig2, axes2 = plt.subplots(1, 2, figsize=(12, 5))
 
 for ax, ds in zip(axes2, datasets):
     name = ds["name"]
@@ -129,7 +129,7 @@
     ax.set_ylabel("p (power)")
     ax.set_title(f"{name}\nCorr(φ, p) = {np.corrcoef(phi_samples, p_samples)[0, 1]:.2f}",
                  fontsize=10)
-    ax.legend(fontsize=8)
+    ax.legend(fontsize=8, loc="upper left")
 
     # Credible ellipse (approx 95%)
     from matplotlib.patches import Ellipse
@@ -145,11 +145,11 @@
     ax.set_xlim(phi_true * 0.7, phi_true * 1.3)
     ax.set_ylim(p_true - 0.02, p_true + 0.02)
 
-plt.suptitle("Joint (φ, p) Posterior Distribution\n"
-             "Tight, well-centered posteriors — no φ-p tradeoff pathology",
-             fontsize=12, y=1.02)
-cbar = plt.colorbar(hb, ax=axes2, shrink=0.6)
+fig2.suptitle("Joint (φ, p) Posterior Distribution\n"
+              "Tight, well-centered posteriors — no φ-p tradeoff pathology",
+              fontsize=12, y=1.02)
+cbar = fig2.colorbar(hb, ax=axes2, shrink=0.6, pad=0.02)
 cbar.set_label("Density")
-plt.savefig(OUT_DIR / "fig_posterior_pairs_joint.png", dpi=150, bbox_inches="tight")
-plt.close()
+fig2.savefig(OUT_DIR / "fig_posterior_pairs_joint.png", dpi=150, bbox_inches="tight")
+plt.close(fig2)
 print(f"Saved {OUT_DIR / 'fig_posterior_pairs_joint.png'}")
@@ -24,7 +24,7 @@
 
 datasets = [
     {"name": "dataCar-like", "mu": 293.0, "phi": 174.0, "p": 1.574, "n": n_obs},
-    {"name": "French TPL-like", "mu": 207.0, "phi": 267.0, "p": 1.633, "n": n_obs},
+    {"name": "High-Inflation-like", "mu": 218.0, "phi": 800.0, "p": 1.633, "n": n_obs},
 ]
 
 fig, axes = plt.subplots(1, 2, figsize=(14, 5))
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@`
`24`	`24`
`25`	`25`	`datasets = [`
`26`	`26`	`{"name": "dataCar-like", "mu": 293.0, "phi": 174.0, "p": 1.574, "n": n_obs},`
`27`		`- {"name": "French TPL-like", "mu": 207.0, "phi": 267.0, "p": 1.633, "n": n_obs},`
	`27`	`+ {"name": "High-Inflation-like", "mu": 218.0, "phi": 800.0, "p": 1.633, "n": n_obs},`
`28`	`28`	`]`
`29`	`29`
`30`	`30`	`fig, axes = plt.subplots(1, 2, figsize=(14, 5))`