updates

HumphreyYang · HumphreyYang · commit 75be91b1acfa · 2025-08-16T11:50:48.000+10:00
diff --git a/lectures/wald_friedman.md b/lectures/wald_friedman.md
@@ -73,6 +73,7 @@ from scipy.integrate import quad
 from scipy.stats import beta
 from collections import namedtuple
 import pandas as pd
+import scipy as sp
 ```
 
 This lecture uses ideas studied in {doc}`the lecture on likelihood ratio processes<likelihood_ratio_process>` and  {doc}`the lecture on Bayesian learning<likelihood_bayes>`.
@@ -327,8 +328,7 @@ f0 = create_beta_density(1, 1)
 f1 = create_beta_density(9, 9)
 grid = np.linspace(0, 1, 50)
 
-fig, ax = plt.subplots(figsize=(10, 8))
-ax.set_title("Original Distributions")
+fig, ax = plt.subplots()
 ax.plot(grid, f0(grid), lw=2, label="$f_0$")
 ax.plot(grid, f1(grid), lw=2, label="$f_1$")
 ax.legend()
@@ -538,7 +538,10 @@ def run_sprt_simulation(a0, b0, a1, b1, α, β, N, seed):
         true_f0 = (i % 2 == 0)
         truth_h0[i] = true_f0
         
-        n, accept_f0 = sprt_single_run(a0, b0, a1, b1, logA, logB, true_f0, seed + i)
+        n, accept_f0 = sprt_single_run(
+                        a0, b0, a1, b1, 
+                        logA, logB, 
+                        true_f0, seed + i)
         stopping_times[i] = n
         decisions_h0[i] = accept_f0
     
@@ -555,8 +558,10 @@ def run_sprt(params):
     truth_h0_bool = truth_h0.astype(bool)
     decisions_h0_bool = decisions_h0.astype(bool)
     
-    type_I = np.sum(truth_h0_bool & ~decisions_h0_bool) / np.sum(truth_h0_bool)
-    type_II = np.sum(~truth_h0_bool & decisions_h0_bool) / np.sum(~truth_h0_bool)
+    type_I = np.sum(truth_h0_bool & ~decisions_h0_bool) \
+            / np.sum(truth_h0_bool)
+    type_II = np.sum(~truth_h0_bool & decisions_h0_bool) \
+            / np.sum(~truth_h0_bool)
     
     return {
         'stopping_times': stopping_times,
@@ -571,8 +576,8 @@ params = SPRTParams(α=0.05, β=0.10, a0=2, b0=5, a1=5, b1=2, N=20000, seed=1)
 results = run_sprt(params)
 
 print(f"Average stopping time: {results['stopping_times'].mean():.2f}")
-print(f"Empirical type I  error: {results['type_I']:.3f}   (target = {params.α})")
-print(f"Empirical type II error: {results['type_II']:.3f}   (target = {params.β})")
+print(f"Empirical type I  error: {results['type_I']:.3f} (target = {params.α})")
+print(f"Empirical type II error: {results['type_II']:.3f} (target = {params.β})")
 ```
 
 As anticipated in the passage above in which Wald discussed the quality of 
@@ -597,7 +602,7 @@ def compute_wald_thresholds(α, β):
     return A, B, np.log(A), np.log(B)
 
 def plot_sprt_results(results, params, title=""):
-    """Reusable function to plot SPRT results."""
+    """Plot SPRT results."""
     fig, axes = plt.subplots(1, 3, figsize=(20, 6))
     
     # Distribution plots
@@ -656,8 +661,10 @@ def plot_confusion_matrix(results, ax):
     
     for i in range(2):
         for j in range(2):
-            percent = confusion_data[i, j] / row_totals[i, 0] if row_totals[i, 0] > 0 else 0
-            color = 'white' if confusion_data[i, j] > confusion_data.max() * 0.5 else 'black'
+            percent = confusion_data[i, j] / row_totals[i, 0] \
+                        if row_totals[i, 0] > 0 else 0
+            color = 'white' if confusion_data[i, j] > confusion_data.max() * 0.5 \
+                    else 'black'
             ax.text(j, i, f'{confusion_data[i, j]}\n({percent:.1%})',
                    ha="center", va="center", color=color, fontweight='bold', 
                    fontsize=14)
@@ -765,7 +772,7 @@ for a0, b0, a1, b1 in param_comb:
     param_list.append((a0, b0, a1, b1))
 
 # Create the plot
-fig, ax = plt.subplots(figsize=(6, 6))
+fig, ax = plt.subplots()
 
 scatter = ax.scatter(js_dists, mean_stopping_times, 
                     s=80, alpha=0.7, linewidth=0.5)
@@ -1009,6 +1016,8 @@ In the two exercises below, please try to rewrite the entire SPRT suite in this
 
 In the first exercise, we apply the sequential probability ratio test to distinguish two models generated by 3-state Markov chains
 
+(For a review on likelihood ratio processes for Markov chains, see [this section](lrp_markov).)
+
 Consider distinguishing between two 3-state Markov chain models using Wald's sequential probability ratio test. 
 
 You have competing hypotheses about the transition probabilities:
@@ -1049,6 +1058,12 @@ The test stops when:
 :class: dropdown
 ```
 
+Below is one solution to the exercise.
+
+In the lecture, we write the code more verbosely to illustrate the concepts clearly.
+
+In the code below, we simplified some of the code structure for a shorter presentation.
+
 ```{code-cell} ipython3
 MarkovSPRTParams = namedtuple('MarkovSPRTParams', 
             ['α', 'β', 'P_0', 'P_1', 'N', 'seed'])
@@ -1076,7 +1091,8 @@ def simulate_markov_chain(P, pi_0, T, seed):
     return path
 
 @njit
-def markov_sprt_single_run(P_0, P_1, pi_0, pi_1, logA, logB, true_P, true_pi, seed):
+def markov_sprt_single_run(P_0, P_1, pi_0, pi_1, 
+                logA, logB, true_P, true_pi, seed):
     """Run single SPRT for Markov chains."""
     max_n = 10000
     path = simulate_markov_chain(true_P, true_pi, max_n, seed)
@@ -1137,14 +1153,18 @@ P_1 = np.array([[0.5, 0.3, 0.2],
                 [0.2, 0.6, 0.2], 
                 [0.2, 0.2, 0.6]])
 
-params_markov = MarkovSPRTParams(α=0.05, β=0.10, P_0=P_0, P_1=P_1, N=1000, seed=42)
+params_markov = MarkovSPRTParams(α=0.05, β=0.10, 
+                        P_0=P_0, P_1=P_1, N=1000, seed=42)
 results_markov = run_markov_sprt(params_markov)
 
 plot_confusion_matrix = lambda results, ax: None 
 fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
 
-ax1.hist(results_markov['stopping_times'], bins=50, color="steelblue", alpha=0.8)
-ax1.set_title("Stopping Times"), ax1.set_xlabel("n"), ax1.set_ylabel("Frequency")
+ax1.hist(results_markov['stopping_times'], 
+            bins=50, color="steelblue", alpha=0.8)
+ax1.set_title("Stopping Times")
+ax1.set_xlabel("n")
+ax1.set_ylabel("Frequency")
 
 # Confusion matrix (reusing pattern from lecture)
 f0_c = np.sum(results_markov['truth_h0'] & results_markov['decisions_h0'])
@@ -1154,20 +1174,23 @@ f1_i = np.sum(~results_markov['truth_h0'] & results_markov['decisions_h0'])
 
 confusion_data = np.array([[f0_c, f0_i], [f1_i, f1_c]])
 ax2.imshow(confusion_data, cmap='Blues')
-ax.set_title('Confusion Matrix')
-ax.set_xticks([0, 1])
-ax.set_xticklabels(['Accept $H_0$', 'Reject $H_0$'])
-ax.set_yticks([0, 1])
-ax.set_yticklabels(['True $P^{(0)}$', 'True $P^{(1)}$'])
+ax2.set_title('Confusion Matrix')
+ax2.set_xticks([0, 1])
+ax2.set_xticklabels(['Accept $H_0$', 'Reject $H_0$'])
+ax2.set_yticks([0, 1])
+ax2.set_yticklabels(['True $P^{(0)}$', 'True $P^{(1)}$'])
 
 row_totals = confusion_data.sum(axis=1, keepdims=True)
 
 for i in range(2):
     for j in range(2):
-        percent = confusion_data[i, j] / row_totals[i, 0] if row_totals[i, 0] > 0 else 0
-        color = 'white' if confusion_data[i, j] > confusion_data.max() * 0.5 else 'black'
-        ax.text(j, i, f'{confusion_data[i, j]}\n({percent:.1%})',
-                ha="center", va="center", color=color, fontweight='bold')
+        percent = confusion_data[i, j] / row_totals[i, 0] \
+                    if row_totals[i, 0] > 0 else 0
+        color = 'white' if confusion_data[i, j] > confusion_data.max() * 0.5 \
+                else 'black'
+        ax2.text(j, i, f'{confusion_data[i, j]}\n({percent:.1%})',
+                ha="center", va="center", color=color, fontweight='bold', 
+                fontsize=14)
 
 plt.tight_layout()
 plt.show()
@@ -1182,6 +1205,8 @@ plt.show()
 
 In this exercise, apply Wald's sequential test to distinguish between two VAR(1) models with different dynamics and noise structures.
 
+For a review of the likelihood ratio process with VAR models, see {doc}`likelihood_var`.
+
 Given VAR models under each hypothesis:
 - $H_0$: $x_{t+1} = A^{(0)} x_t + C^{(0)} w_{t+1}$
 - $H_1$: $x_{t+1} = A^{(1)} x_t + C^{(1)} w_{t+1}$
@@ -1210,17 +1235,17 @@ Tasks:
 :class: dropdown
 ```
 
-```{code-cell} ipython3
-import scipy as sc
+Below is one solution to the exercise
 
+```{code-cell} ipython3
 VARSPRTParams = namedtuple('VARSPRTParams', 
             ['α', 'β', 'A_0', 'C_0', 'A_1', 'C_1', 'N', 'seed'])
 
 def create_var_model(A, C):
     """Create VAR model."""
     μ_0 = np.zeros(A.shape[0])
     CC = C @ C.T
-    Σ_0 = sc.linalg.solve_discrete_lyapunov(A, CC)
+    Σ_0 = sp.linalg.solve_discrete_lyapunov(A, CC)
     
     CC_inv = np.linalg.inv(CC + 1e-10 * np.eye(CC.shape[0]))
     Σ_0_inv = np.linalg.inv(Σ_0 + 1e-10 * np.eye(Σ_0.shape[0]))
@@ -1246,14 +1271,16 @@ def var_log_likelihood(x_curr, x_prev, model, initial=False):
         return -0.5 * (n * np.log(2 * np.pi) + model['log_det_CC'] + 
                       diff @ model['CC_inv'] @ diff)
 
-def var_sprt_single_run(model_0, model_1, model_true, logA, logB, seed):
-    """Single VAR SPRT run"""
+def var_sprt_single_run(model_0, model_1, model_true, 
+                        logA, logB, seed):
+    """Single VAR SPRT run."""
     np.random.seed(seed)
     max_T = 500
     
     # Generate VAR path
     Σ_chol = np.linalg.cholesky(model_true['Σ_0'])
-    x = model_true['μ_0'] + Σ_chol @ np.random.randn(len(model_true['μ_0']))
+    x = model_true['μ_0'] + Σ_chol @ np.random.randn(
+                len(model_true['μ_0']))
     
     # Initial likelihood ratio
     log_L = (var_log_likelihood(x, None, model_1, True) - 
@@ -1299,8 +1326,10 @@ def run_var_sprt(params):
     type_I = np.sum(truth_h0 & ~decisions_h0) / np.sum(truth_h0)
     type_II = np.sum(~truth_h0 & decisions_h0) / np.sum(~truth_h0)
     
-    return {'stopping_times': stopping_times, 'decisions_h0': decisions_h0,
-            'truth_h0': truth_h0, 'type_I': type_I, 'type_II': type_II}
+    return {'stopping_times': stopping_times, 
+            'decisions_h0': decisions_h0,
+            'truth_h0': truth_h0, 
+            'type_I': type_I, 'type_II': type_II}
 
 # Run VAR SPRT
 A_0 = np.array([[0.8, 0.1], 
@@ -1329,8 +1358,8 @@ ax2.bar(x - 0.2, [results_markov['type_I'], results_var['type_I']],
         0.4, label='Type I', alpha=0.7)
 ax2.bar(x + 0.2, [results_markov['type_II'], results_var['type_II']], 
         0.4, label='Type II', alpha=0.7)
-ax2.axhline(y=0.05, linestyle='--', alpha=0.5)
-ax2.axhline(y=0.10, linestyle='--', alpha=0.5)
+ax2.axhline(y=0.05, linestyle='--', alpha=0.5, color='C0')
+ax2.axhline(y=0.10, linestyle='--', alpha=0.5, color='C1')
 ax2.set_xticks(x), ax2.set_xticklabels(['Markov', 'VAR'])
 ax2.legend(), plt.tight_layout(), plt.show()
 ```